library(ggplot2)
library(vegan)
library(colorRamps)
library(psych)
library(tidyr)
library(readxl)
library(ape)
library(ggplot2)
library(colorRamps)
library(ggrepel)

rm(list = ls())
load("data/CForBio.data.prep.2022.11.22.RData")

library(vegan)
cutoff <- 1000

sort(colSums(bac.amp1))
##  BD1623  DL1904    NG14  XS0601 BTM1515  GT0910     NG7  BD1512  TT1419  CB2424 
##   24903   26972   27053   27234   29143   31098   33452   34090   34339   34770 
##  GT2515  TT0302 DHS0824  HS1433  HS1531  TT1201  HS2325 BTM2317  GT0206 DHS0911 
##   34826   35210   35306   35369   35611   35751   36395   36829   36923   37867 
##    LS11 DHS0808 BTM1323     NG8  CB2009  XS0809  DL0810     LS9  DL0906  XS2022 
##   39954   41573   42171   42958   43395   45875   45977   46174   46676   47364 
##     GH1     GH4  BD1203    LS15  CB0122     GH2 
##   48535   54597   55685   60392   65051   65829
sort(colSums(bac.mgm1))
##   HS1433   BD1623      NG7   XS2022  DHS0824  DHS0808   DL1904  BTM1323 
##  9115574  9597402  9667544  9737388  9875341 10013817 10016208 10110020 
##   TT1419   HS2325   GT0910   DL0810   GT2515   GT0206   TT0302      NG8 
## 10160188 10164701 10195067 10301253 10395324 10534807 10643744 10648853 
##   XS0809   HS1531     LS11   CB2424  BTM2317   XS0601      GH1   DL0906 
## 10675545 10684100 10970718 11115704 11143274 11320447 11461977 11544870 
##   BD1512      GH2     NG14  DHS0911   CB0122   CB2009   BD1203   TT1201 
## 11738899 11750163 11847992 11987853 12237915 12559810 12632900 12911304 
##      LS9     LS15      GH4  BTM1515 
## 13903753 14521553 14922282 15939544
sort(colSums(ko.mgm1))
##   HS1433   HS1531   BD1512   HS2325  DHS0911   BD1203   CB0122   XS2022 
## 516612.0 554923.2 561838.9 564412.5 570647.8 572166.7 578939.1 581887.7 
##   BD1623   TT1419      GH4   XS0809   CB2009   TT0302  DHS0824   GT0206 
## 585609.5 588950.4 590928.8 591823.2 595951.2 597472.6 597523.9 605316.2 
##   GT0910  DHS0808   XS0601   TT1201      LS9     LS15      NG8   GT2515 
## 607017.0 611526.6 612949.0 618809.8 618974.8 621618.7 623901.4 624726.0 
##     NG14      NG7  BTM1323  BTM1515     LS11  BTM2317   CB2424      GH2 
## 626956.5 630290.3 640564.2 640670.3 653233.2 657988.6 661383.3 664885.4 
##      GH1   DL0906   DL0810   DL1904 
## 670864.8 682216.2 709302.7 712869.3
sort(colSums(cog.mgm1))
##   HS1433   BD1512   CB0122  DHS0911   HS1531   BD1623   HS2325      GH4 
## 665137.6 700954.6 701771.9 706113.6 707706.2 717680.3 719426.8 719907.9 
##   BD1203   TT1419   XS2022   CB2009   XS0809      NG8   TT0302      LS9 
## 720810.2 735415.6 736115.9 736728.3 737294.4 738141.8 741127.0 741129.8 
##  DHS0824     NG14   GT0206      NG7     LS15   GT0910  DHS0808   XS0601 
## 742211.5 745558.0 745871.5 746178.6 747159.8 748678.5 749599.8 753834.3 
##  BTM2317  BTM1323  BTM1515   TT1201     LS11   GT2515   CB2424   DL0906 
## 761668.5 762288.9 766129.1 769869.2 772483.2 780307.3 785374.0 785610.6 
##      GH2      GH1   DL1904   DL0810 
## 806012.5 810165.7 822934.2 825360.4
sort(colSums(rgi.mgm1))
##      GH4   CB0122   CB2009      NG8   BD1623      NG7     NG14   DL0906 
## 20995.21 23022.58 24293.34 24840.47 25081.28 25099.30 25294.91 25324.26 
##   BD1512     LS15   HS1433      LS9   TT1419     LS11   BD1203  BTM2317 
## 25426.07 25842.94 25856.23 25965.01 26532.75 26701.09 26808.88 27092.64 
##  DHS0911   TT0302   XS0601   XS0809   CB2424   XS2022   GT0206  BTM1515 
## 27105.60 27138.77 27223.78 27593.08 27629.52 27753.26 27775.72 27778.20 
##   GT0910   TT1201      GH2   DL0810   DL1904   HS2325  DHS0824  DHS0808 
## 28043.34 28054.02 28314.02 28403.45 28474.85 28496.89 28701.60 28718.88 
##      GH1   HS1531  BTM1323   GT2515 
## 28730.37 29012.93 29564.26 30208.16
sort(colSums(cazy.mgm1))
##   BD1623      GH4   BD1512      NG8      NG7     NG14   CB0122   XS0601 
## 13535.55 13791.28 14072.59 14476.19 14543.41 14716.72 14718.56 14858.62 
##   CB2009   BD1203   GT0910   GT0206   DL0906   CB2424      LS9     LS15 
## 15158.24 15372.90 15421.68 15423.15 15490.17 15512.52 15621.44 15661.32 
##     LS11  BTM2317   XS0809   HS1433   HS2325   XS2022   DL1904   DL0810 
## 15742.96 15933.04 16022.71 16071.16 16630.05 16715.92 16996.28 17006.50 
##      GH2   GT2515  DHS0911      GH1   HS1531  BTM1515   TT1419  DHS0824 
## 17012.91 17096.55 17106.44 17144.00 17227.44 17361.87 17600.44 17729.42 
##  BTM1323   TT0302   TT1201  DHS0808 
## 17929.15 18253.39 18461.59 18792.74
bac.amp <- rrarefy(t(bac.amp1),sample = 24903 )
bac.mgm <- data.frame(t(bac.mgm1))

ko.mgm <- data.frame(t(ko.mgm1))
cog.mgm <- data.frame(t(cog.mgm1))
rgi.mgm <- data.frame(t(rgi.mgm1))
cazy.mgm <- data.frame(t(cazy.mgm1))

env.amp1$rgi.mgm.H <- vegan::diversity(rgi.mgm)
env.amp1$cazy.mgm.H <- vegan::diversity(cazy.mgm)
env.amp1$ko.mgm.H <- vegan::diversity(ko.mgm)
env.amp1$cog.mgm.H <- vegan::diversity(cog.mgm)
env.amp1$bac.mgm.H <- vegan::diversity(bac.mgm)
env.amp1$bac.amp.H <- vegan::diversity(bac.amp)

env.amp1$rgi.mgm.S <- vegan::specnumber(rgi.mgm)
env.amp1$cazy.mgm.S <- vegan::specnumber(cazy.mgm)
env.amp1$ko.mgm.S <- vegan::specnumber(ko.mgm)
env.amp1$cog.mgm.S <- vegan::specnumber(cog.mgm)
env.amp1$bac.mgm.S <- vegan::specnumber(bac.mgm)
env.amp1$bac.amp.S <- vegan::specnumber(bac.amp)

env.amp1$cog.mgm.sum <- rowSums(cog.mgm)
env.amp1$rgi.mgm.sum <- rowSums(rgi.mgm)
env.amp1$cazy.mgm.sum <- rowSums(cazy.mgm)
env.amp1$ko.mgm.sum <- rowSums(ko.mgm)
env.amp1$bac.mgm.sum <- rowSums(bac.mgm)

#genomeTraits
genomeTrait<-read_excel("data/genomeTrait/genomesize.contigsprok/genomesize_novirus/all_genomesize.novirus.xlsx",sheet = "Sheet2")

genomeTrait$sample_name == env.amp1$sample_name
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.genomeTraits<-cbind(env.amp1,genomeTrait[,c(2:8)])
#Supplementary Fig. 1
#ordination
library(ape)
library(ggplot2)
library(colorRamps)

#bac.amplicon
#calculate distance matrix
set.seed(315)
bac.amp.dist<-vegdist(bac.amp,method = 'bray')

#cmdscale function
bac.amp.pc<-cmdscale(bac.amp.dist,k=(nrow(bac.amp)-1),eig = TRUE)
eig<-bac.amp.pc$eig
bac.amp.pc1<-(eig[1]*100/sum(eig))
bac.amp.pc2<-(eig[2]*100/sum(eig))

#subset environmental data table
env.sub <- env.amp1[, c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich")]; 
colnames(env.sub) <-  c("Latitude","Longitude","Altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness")

#envfit analysis
ef<-envfit(bac.amp.pc,env.sub,permutations=999,na.rm=TRUE)
ef
## 
## ***VECTORS
## 
##                       Dim1     Dim2     r2 Pr(>r)    
## Latitude           0.64434 -0.76474 0.7235  0.001 ***
## Longitude          0.49235 -0.87040 0.2699  0.004 ** 
## Altitude           0.64390 -0.76511 0.2947  0.006 ** 
## MAP               -0.92840  0.37159 0.7042  0.001 ***
## MAT               -0.63470  0.77276 0.6747  0.001 ***
## TC                 0.43316 -0.90132 0.3550  0.003 ** 
## TN                 0.89899 -0.43797 0.2195  0.027 *  
## TP                 0.99948 -0.03216 0.5093  0.001 ***
## pH                 0.65402  0.75648 0.7467  0.001 ***
## ACa                0.88251  0.47029 0.6914  0.001 ***
## AMg                0.64494  0.76423 0.6420  0.001 ***
## AFe                0.34361  0.93911 0.3927  0.001 ***
## AK                -0.32032 -0.94731 0.1072  0.150    
## C_N               -0.03667 -0.99933 0.4605  0.001 ***
## C_P               -0.91840 -0.39565 0.5110  0.001 ***
## N_P               -0.99449 -0.10488 0.5432  0.001 ***
## Soil bulk density -0.57728 -0.81655 0.2642  0.006 ** 
## Plant abundance   -0.91311  0.40772 0.0973  0.181    
## Plant basal area  -0.09837 -0.99515 0.1916  0.035 *  
## Plant richness    -0.85613  0.51676 0.5796  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
#extract scores and add color var
pcoa.scores.bact.amp<-data.frame(bac.amp.pc$points)[1:35]
pcoa.scores.bact.amp$Site.latitude<-env.amp1$site.latitude
m1<-max(abs(pcoa.scores.bact.amp$X1)); m2<-max(abs(pcoa.scores.bact.amp$X2))

#extract envfit result
en_coord_cont.bact.amp<-as.data.frame(scores(ef,"vectors"))*ordiArrowMul(ef)
en_coord_cont.bact.amp
##                          Dim1        Dim2
## Latitude           0.56013924 -0.66481177
## Longitude          0.26144377 -0.46219132
## Altitude           0.35722591 -0.42447430
## MAP               -0.79626246  0.31870204
## MAT               -0.53284655  0.64875262
## TC                 0.26377208 -0.54885284
## TN                 0.43044578 -0.20970404
## TP                 0.72899299 -0.02346002
## pH                 0.57761811  0.66810497
## ACa                0.75000000  0.39967560
## AMg                0.52813385  0.62582498
## AFe                0.22006155  0.60145287
## AK                -0.10720886 -0.31706334
## C_N               -0.02543437 -0.69306637
## C_P               -0.67095480 -0.28904619
## N_P               -0.74912718 -0.07900155
## Soil bulk density -0.30328553 -0.42898694
## Plant abundance   -0.29113451  0.12999647
## Plant basal area  -0.04401280 -0.44525338
## Plant richness    -0.66616579  0.40209693
en_coord_cont.bact.amp$envfactor<-rownames(en_coord_cont.bact.amp)

p.bact = ggplot(data = pcoa.scores.bact.amp, aes(x = X1, y = X2)) + 
  geom_point(size=5,mapping = aes(color=Site.latitude))+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  geom_segment(aes(x = 0, y = 0, xend = Dim1*m1, yend = Dim2*m1),arrow = arrow(length = unit(0.02, "npc")),
               data = en_coord_cont.bact.amp, size =0.6, colour = "grey")+
  geom_text_repel(data = en_coord_cont.bact.amp, aes(x=Dim1*m1,y=Dim2*m1,label = envfactor),
                  colour="black",size=3)+
  ggtitle("Bacteria.16S.Amplicon")+
  labs(x = sprintf("PCo1 (%.1f%%)", bac.amp.pc1), y = sprintf("PCo2 (%.1f%%)", bac.amp.pc2))+
  theme_bw()+theme(axis.text.x = element_text(size = 12,color = "black"),axis.title.x = element_text(size = 12,color = "black"),
                   axis.text.y = element_text(size = 12,color = "black",),axis.title.y = element_text(size = 12,color = "black"),
                   legend.title = element_text(size=12))

p.bact

#ggsave("pdf2/Fig.S5a_2.PcoA.bact.composition.2023.08.24.pdf", width = 6, height = 4)

#bac.mgm
set.seed(315)
bac.mgm.dist<-vegdist(bac.mgm,method = 'bray')

#cmdscale function
bac.mgm.pc<-cmdscale(bac.mgm.dist,k=(nrow(bac.mgm)-1),eig = TRUE)
eig<-bac.mgm.pc$eig
bac.mgm.pc1<-(eig[1]*100/sum(eig))
bac.mgm.pc2<-(eig[2]*100/sum(eig))

#subset environmental data table
env.sub <- env.amp1[, c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich")]; 
colnames(env.sub) <-  c("Latitude","Longitude","Altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness")

#envfit analysis
ef<-envfit(bac.mgm.pc,env.sub,permutations=999,na.rm=TRUE)
ef
## 
## ***VECTORS
## 
##                       Dim1     Dim2     r2 Pr(>r)    
## Latitude           0.42080  0.90715 0.5000  0.001 ***
## Longitude          0.31234  0.94997 0.1370  0.079 .  
## Altitude           0.23115  0.97292 0.1535  0.067 .  
## MAP               -0.67158 -0.74093 0.6670  0.001 ***
## MAT               -0.40495 -0.91434 0.4457  0.002 ** 
## TC                 0.30022  0.95387 0.1873  0.029 *  
## TN                 0.78687  0.61712 0.2139  0.024 *  
## TP                 0.87349  0.48684 0.6527  0.001 ***
## pH                 0.97217 -0.23426 0.8676  0.001 ***
## ACa                0.99977  0.02166 0.8189  0.001 ***
## AMg                0.97346 -0.22886 0.7548  0.001 ***
## AFe                0.85316 -0.52164 0.4543  0.001 ***
## AK                -0.95701  0.29007 0.1260  0.116    
## C_N               -0.39574  0.91836 0.2544  0.008 ** 
## C_P               -0.91325 -0.40740 0.5970  0.001 ***
## N_P               -0.83879 -0.54446 0.6023  0.001 ***
## Soil bulk density -0.98603 -0.16654 0.2178  0.021 *  
## Plant abundance   -0.48348 -0.87535 0.1636  0.048 *  
## Plant basal area  -0.34464  0.93873 0.3233  0.004 ** 
## Plant richness    -0.66758 -0.74453 0.4741  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
#extract scores and add color var
pcoa.scores.bact.mgm<-data.frame(bac.mgm.pc$points)[1:26]
pcoa.scores.bact.mgm$Site.latitude<-env.amp1$site.latitude
m1<-max(abs(pcoa.scores.bact.mgm$X1)); m2<-max(abs(pcoa.scores.bact.mgm$X2))


#extract envfit result
en_coord_cont.bact.mgm<-as.data.frame(scores(ef,"vectors"))*ordiArrowMul(ef)
en_coord_cont.bact.mgm
##                          Dim1        Dim2
## Latitude           0.24645096  0.53129711
## Longitude          0.09576549  0.29126990
## Altitude           0.07499738  0.31567302
## MAP               -0.45428401 -0.50119297
## MAT               -0.22390720 -0.50555672
## TC                 0.10762124  0.34194357
## TN                 0.30138685  0.23637169
## TP                 0.58448378  0.32576319
## pH                 0.75000000 -0.18072762
## ACa                0.74935727  0.01623147
## AMg                0.70047163 -0.16467795
## AFe                0.47629421 -0.29121659
## AK                -0.28135312  0.08527704
## C_N               -0.16533269  0.38367230
## C_P               -0.58443859 -0.26072127
## N_P               -0.53915354 -0.34996348
## Soil bulk density -0.38115403 -0.06437582
## Plant abundance   -0.16194919 -0.29321340
## Plant basal area  -0.16231546  0.44211469
## Plant richness    -0.38071526 -0.42459937
en_coord_cont.bact.mgm$envfactor<-rownames(en_coord_cont.bact.mgm)

p.bact.mgm = ggplot(data = pcoa.scores.bact.mgm, aes(x = X1, y = X2)) + 
  geom_point(size=5,mapping = aes(color=Site.latitude))+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  geom_segment(aes(x = 0, y = 0, xend = Dim1*m1, yend = Dim2*m2),arrow = arrow(length = unit(0.02, "npc")),
               data = en_coord_cont.bact.mgm, size =0.6, colour = "grey")+
  geom_text_repel(data = en_coord_cont.bact.mgm, aes(x=Dim1*m1,y=Dim2*m2,label = envfactor),
                  colour="black",size=3)+
  ggtitle("Bacteria.Metagenome.Shotgun")+
  labs(x = sprintf("PCo1 (%.1f%%)", bac.mgm.pc1), y = sprintf("PCo2 (%.1f%%)", bac.mgm.pc2))+
  theme_bw()+theme(axis.text.x = element_text(size = 12,color = "black"),axis.title.x = element_text(size = 12,color = "black"),
                   axis.text.y = element_text(size = 12,color = "black",),axis.title.y = element_text(size = 12,color = "black"),
                   legend.title = element_text(size=12))
p.bact.mgm

#ggsave("pdf2/Fig.S5b.PcoA.bact.composition.2023.08.24.pdf", width = 6, height = 4)

#ko.mgm
set.seed(315)
ko.mgm.dist<-vegdist(ko.mgm,method = 'bray')

#cmdscale function
ko.mgm.pc<-cmdscale(ko.mgm.dist,k=(nrow(ko.mgm)-1),eig = TRUE)
eig<-ko.mgm.pc$eig
ko.mgm.pc1<-(eig[1]*100/sum(eig))
ko.mgm.pc2<-(eig[2]*100/sum(eig))

#subset environmental data table
env.sub <- env.amp1[, c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich")]; 
colnames(env.sub) <-  c("Latitude","Longitude","Altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness")

#envfit analysis
ef<-envfit(ko.mgm.pc,env.sub,permutations=999,na.rm=TRUE)
ef
## 
## ***VECTORS
## 
##                       Dim1     Dim2     r2 Pr(>r)    
## Latitude           0.58576  0.81049 0.3834  0.001 ***
## Longitude          0.33285  0.94298 0.1808  0.038 *  
## Altitude           0.44348  0.89628 0.0827  0.222    
## MAP               -0.93189 -0.36274 0.5043  0.001 ***
## MAT               -0.57871 -0.81554 0.3213  0.004 ** 
## TC                 0.54793  0.83652 0.1222  0.104    
## TN                 0.87435 -0.48529 0.1904  0.028 *  
## TP                 0.89319 -0.44967 0.5693  0.001 ***
## pH                 0.87604 -0.48223 0.8325  0.001 ***
## ACa                0.99333  0.11529 0.8480  0.001 ***
## AMg                0.90581 -0.42369 0.7441  0.001 ***
## AFe                0.45755 -0.88919 0.6142  0.001 ***
## AK                -0.20527  0.97871 0.3309  0.003 ** 
## C_N               -0.15276  0.98826 0.4927  0.001 ***
## C_P               -0.86484  0.50206 0.6099  0.001 ***
## N_P               -0.98352  0.18080 0.5977  0.001 ***
## Soil bulk density -0.50795  0.86139 0.2628  0.007 ** 
## Plant abundance   -0.72252  0.69135 0.0527  0.399    
## Plant basal area  -0.84294 -0.53801 0.2144  0.020 *  
## Plant richness    -0.71641 -0.69768 0.4920  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
#extract scores and add color var
pcoa.scores.ko.mgm<-data.frame(ko.mgm.pc$points)[1:35]
pcoa.scores.ko.mgm$Site.latitude<-env.amp1$site.latitude
m1<-max(abs(pcoa.scores.ko.mgm$X1)); m2<-max(abs(pcoa.scores.ko.mgm$X2))


#extract envfit result
en_coord_cont.ko.mgm<-as.data.frame(scores(ef,"vectors"))*ordiArrowMul(ef)
en_coord_cont.ko.mgm
##                          Dim1       Dim2
## Latitude           0.29738774  0.4114841
## Longitude          0.11602764  0.3287106
## Altitude           0.10453889  0.2112753
## MAP               -0.54258098 -0.2112004
## MAT               -0.26895054 -0.3790165
## TC                 0.15702258  0.2397254
## TN                 0.31280018 -0.1736139
## TP                 0.55256466 -0.2781863
## pH                 0.65535896 -0.3607530
## ACa                0.75000000  0.0870472
## AMg                0.64067157 -0.2996765
## AFe                0.29401317 -0.5713778
## AK                -0.09680930  0.4615829
## C_N               -0.08791761  0.5687589
## C_P               -0.55378353  0.3214828
## N_P               -0.62346477  0.1146108
## Soil bulk density -0.21349078  0.3620435
## Plant abundance   -0.13594752  0.1300820
## Plant basal area  -0.31998559 -0.2042320
## Plant richness    -0.41199512 -0.4012266
en_coord_cont.ko.mgm$envfactor<-rownames(en_coord_cont.ko.mgm)

p.ko.mgm = ggplot(data = pcoa.scores.ko.mgm, aes(x = X1, y = X2)) + 
  geom_point(size=5,mapping = aes(color=Site.latitude))+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  geom_segment(aes(x = 0, y = 0, xend = Dim1*m1, yend = Dim2*m2),arrow = arrow(length = unit(0.02, "npc")),
               data = en_coord_cont.ko.mgm, size =0.6, colour = "grey")+
  geom_text_repel(data = en_coord_cont.ko.mgm, aes(x=Dim1*m1,y=Dim2*m2,label = envfactor),
                  colour="black",size=3)+
  ggtitle("KO.Metagenome")+
  labs(x = sprintf("PCo1 (%.1f%%)", ko.mgm.pc1), y = sprintf("PCo2 (%.1f%%)", ko.mgm.pc2))+
  theme_bw()+theme(axis.text.x = element_text(size = 12,color = "black"),axis.title.x = element_text(size = 12,color = "black"),
                   axis.text.y = element_text(size = 12,color = "black",),axis.title.y = element_text(size = 12,color = "black"),
                   legend.title = element_text(size=12))
p.ko.mgm

#ggsave("pdf2/Fig.S5c.PcoA.KO.composition.2023.08.24.pdf", width = 6, height = 4)

#rgi.mgm
set.seed(315)
rgi.mgm.dist<-vegdist(rgi.mgm,method = 'bray')

#cmdscale function
rgi.mgm.pc<-cmdscale(rgi.mgm.dist,k=(nrow(rgi.mgm)-1),eig = TRUE)
eig<-rgi.mgm.pc$eig
rgi.mgm.pc1<-(eig[1]*100/sum(eig))
rgi.mgm.pc2<-(eig[2]*100/sum(eig))

#subset environmental data table
env.sub <- env.amp1[, c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich")]; 
colnames(env.sub) <-  c("Latitude","Longitude","Altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness")

#envfit analysis
ef<-envfit(rgi.mgm.pc,env.sub,permutations=999,na.rm=TRUE)
ef
## 
## ***VECTORS
## 
##                       Dim1     Dim2     r2 Pr(>r)    
## Latitude           0.48603 -0.87394 0.4106  0.001 ***
## Longitude          0.69607 -0.71798 0.0854  0.219    
## Altitude           0.09595 -0.99539 0.3231  0.005 ** 
## MAP               -0.66337  0.74829 0.6141  0.001 ***
## MAT               -0.44612  0.89497 0.3772  0.002 ** 
## TC                 0.76116 -0.64856 0.1173  0.112    
## TN                 0.99999 -0.00390 0.2730  0.007 ** 
## TP                 0.94737 -0.32014 0.6591  0.001 ***
## pH                 0.84127 -0.54061 0.7804  0.001 ***
## ACa                0.78041 -0.62527 0.8867  0.001 ***
## AMg                0.92829 -0.37186 0.7052  0.001 ***
## AFe                0.95377  0.30055 0.4009  0.001 ***
## AK                -0.75505 -0.65567 0.1376  0.085 .  
## C_N               -0.49333 -0.86984 0.1969  0.032 *  
## C_P               -0.99442  0.10552 0.6067  0.001 ***
## N_P               -0.95648  0.29179 0.5950  0.001 ***
## Soil bulk density -0.64732  0.76222 0.1931  0.023 *  
## Plant abundance   -0.99126  0.13192 0.0668  0.329    
## Plant basal area  -0.76782 -0.64067 0.1976  0.031 *  
## Plant richness    -0.65745  0.75350 0.5007  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
#extract scores and add color var
pcoa.scores.rgi.mgm<-data.frame(rgi.mgm.pc$points)[1:31]
pcoa.scores.rgi.mgm$Site.latitude<-env.amp1$site.latitude
m1<-max(abs(pcoa.scores.rgi.mgm$X1)); m2<-max(abs(pcoa.scores.rgi.mgm$X2))

#extract envfit result
en_coord_cont.rgi.mgm<-as.data.frame(scores(ef,"vectors"))*ordiArrowMul(ef)
en_coord_cont.rgi.mgm
##                          Dim1         Dim2
## Latitude           0.29963680 -0.538781892
## Longitude          0.19568448 -0.201844240
## Altitude           0.05246991 -0.544347073
## MAP               -0.50017239  0.564203448
## MAT               -0.26360889  0.528836280
## TC                 0.25080804 -0.213704571
## TN                 0.50271028 -0.001960014
## TP                 0.73998653 -0.250059560
## pH                 0.71501770 -0.459482058
## ACa                0.70702555 -0.566471045
## AMg                0.75000000 -0.300441453
## AFe                0.58099901  0.183084892
## AK                -0.26946797 -0.233998181
## C_N               -0.21061936 -0.371363936
## C_P               -0.74522191  0.079078145
## N_P               -0.70985409  0.216555927
## Soil bulk density -0.27365158  0.322226473
## Plant abundance   -0.24650471  0.032804771
## Plant basal area  -0.32837179 -0.273992055
## Plant richness    -0.44758618  0.512972997
en_coord_cont.rgi.mgm$envfactor<-rownames(en_coord_cont.rgi.mgm)

p.rgi.mgm = ggplot(data = pcoa.scores.rgi.mgm, aes(x = X1, y = X2)) + 
  geom_point(size=5,mapping = aes(color=Site.latitude))+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  geom_segment(aes(x = 0, y = 0, xend = Dim1*m1, yend = Dim2*m2),arrow = arrow(length = unit(0.02, "npc")),
               data = en_coord_cont.rgi.mgm, size =0.6, colour = "grey")+
  geom_text_repel(data = en_coord_cont.rgi.mgm, aes(x=Dim1*m1,y=Dim2*m2,label = envfactor),
                  colour="black",size=3)+
  ggtitle("ARG.Metagenome")+
  labs(x = sprintf("PCo1 (%.1f%%)", rgi.mgm.pc1), y = sprintf("PCo2 (%.1f%%)", rgi.mgm.pc2))+
  theme_bw()+theme(axis.text.x = element_text(size = 12,color = "black"),axis.title.x = element_text(size = 12,color = "black"),
                   axis.text.y = element_text(size = 12,color = "black",),axis.title.y = element_text(size = 12,color = "black"),
                   legend.title = element_text(size=12))
p.rgi.mgm

#ggsave("pdf2/Fig.S5d.PcoA.ARG.composition.2023.08.24.pdf", width = 6, height = 4)

#cazy.mgm
set.seed(315)
cazy.mgm.dist<-vegdist(cazy.mgm,method = 'bray')

#cmdscale function
cazy.mgm.pc<-cmdscale(cazy.mgm.dist,k=(nrow(cazy.mgm)-1),eig = TRUE)
eig<-cazy.mgm.pc$eig
cazy.mgm.pc1<-(eig[1]*100/sum(eig))
cazy.mgm.pc2<-(eig[2]*100/sum(eig))

#subset environmental data table
env.sub <- env.amp1[, c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich")]; 
colnames(env.sub) <-  c("Latitude","Longitude","Altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness")

#envfit analysis
ef<-envfit(cazy.mgm.pc,env.sub,permutations=999,na.rm=TRUE)
ef
## 
## ***VECTORS
## 
##                       Dim1     Dim2     r2 Pr(>r)    
## Latitude          -0.72585  0.68786 0.3686  0.002 ** 
## Longitude         -0.43210  0.90182 0.1540  0.055 .  
## Altitude          -0.99984  0.01814 0.0825  0.250    
## MAP                0.61327 -0.78987 0.6373  0.001 ***
## MAT                0.82086 -0.57114 0.3090  0.008 ** 
## TC                -0.87596  0.48238 0.1324  0.084 .  
## TN                -0.98160  0.19094 0.2999  0.005 ** 
## TP                -0.96307  0.26925 0.6831  0.001 ***
## pH                -0.89688  0.44227 0.7826  0.001 ***
## ACa               -0.73360  0.67958 0.8802  0.001 ***
## AMg               -0.88603  0.46363 0.6596  0.001 ***
## AFe               -0.99401 -0.10928 0.4367  0.001 ***
## AK                 0.99703  0.07696 0.1005  0.181    
## C_N                0.94472  0.32788 0.0635  0.344    
## C_P                0.93341 -0.35880 0.5566  0.001 ***
## N_P                0.92667 -0.37588 0.5655  0.001 ***
## Soil bulk density  0.91417  0.40533 0.2858  0.005 ** 
## Plant abundance    0.53150 -0.84706 0.1000  0.176    
## Plant basal area   0.25323 -0.96741 0.1722  0.052 .  
## Plant richness     0.62642 -0.77948 0.5625  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
#extract scores and add color var
pcoa.scores.cazy.mgm<-data.frame(cazy.mgm.pc$points)[1:24]
pcoa.scores.cazy.mgm$Site.latitude<-env.amp1$site.latitude
m1<-max(abs(pcoa.scores.cazy.mgm$X1)); m2<-max(abs(pcoa.scores.cazy.mgm$X2))

#extract envfit result
en_coord_cont.cazy.mgm<-as.data.frame(scores(ef,"vectors"))*ordiArrowMul(ef)
en_coord_cont.cazy.mgm
##                         Dim1         Dim2
## Latitude          -0.4742840  0.449458516
## Longitude         -0.1825317  0.380955191
## Altitude          -0.3091767  0.005610774
## MAP                0.5269460 -0.678692841
## MAT                0.4910803 -0.341684028
## TC                -0.3430300  0.188900061
## TN                -0.5786084  0.112548294
## TP                -0.8566943  0.239512269
## pH                -0.8539685  0.421105139
## ACa               -0.7407636  0.686216101
## AMg               -0.7744690  0.405257277
## AFe               -0.7069852 -0.077727317
## AK                 0.3402142  0.026261368
## C_N                0.2562756  0.088943073
## C_P                0.7495152 -0.288109202
## N_P                0.7500000 -0.304220198
## Soil bulk density  0.5259784  0.233209587
## Plant abundance    0.1808565 -0.288234538
## Plant basal area   0.1131034 -0.432089601
## Plant richness     0.5056624 -0.629219503
en_coord_cont.cazy.mgm$envfactor<-rownames(en_coord_cont.cazy.mgm)

p.cazy.mgm = ggplot(data = pcoa.scores.cazy.mgm, aes(x = X1, y = X2)) + 
  geom_point(size=5,mapping = aes(color=Site.latitude))+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  geom_segment(aes(x = 0, y = 0, xend = Dim1*m1, yend = Dim2*m2),arrow = arrow(length = unit(0.02, "npc")),
               data = en_coord_cont.cazy.mgm, size =0.6, colour = "grey")+
  geom_text_repel(data = en_coord_cont.cazy.mgm, aes(x=Dim1*m1,y=Dim2*m2,label = envfactor),
                  colour="black",size=3)+
  ggtitle("CAZy.Metagenome")+
  labs(x = sprintf("PCo1 (%.1f%%)", cazy.mgm.pc1), y = sprintf("PCo2 (%.1f%%)", cazy.mgm.pc2))+
  theme_bw()+theme(axis.text.x = element_text(size = 12,color = "black"),axis.title.x = element_text(size = 12,color = "black"),
                   axis.text.y = element_text(size = 12,color = "black",),axis.title.y = element_text(size = 12,color = "black"),
                   legend.title = element_text(size=12))
p.cazy.mgm

#ggsave("pdf2/Fig.S5e.PcoA.CAZy.composition.2023.08.24.pdf", width = 6, height = 4)
#Supplementary Fig. 2
library(readxl)
#genomesize of otu
genomesize_otu<-read_excel("data/otu_genome/calculate/genomesize_otu.xlsx")
genomesize_otu$sample_name == env.amp1$sample_name
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
genomesize_otu<-cbind(genomesize_otu,env.amp1[,c(29,82)])

library(ggplot2)
library(colorRamps)

summary(lm(AGS_otu~pH,genomesize_otu))
## 
## Call:
## lm(formula = AGS_otu ~ pH, data = genomesize_otu)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.30434 -0.10159 -0.00056  0.09489  0.36679 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.46410    0.12243  44.632   <2e-16 ***
## pH          -0.06557    0.02440  -2.688   0.0111 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1493 on 34 degrees of freedom
## Multiple R-squared:  0.1752, Adjusted R-squared:  0.151 
## F-statistic: 7.223 on 1 and 34 DF,  p-value: 0.01106
shapiro.test(residuals(lm(AGS_otu~pH,genomesize_otu)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(AGS_otu ~ pH, genomesize_otu))
## W = 0.98375, p-value = 0.8632
p15<-ggplot() + geom_jitter(data=genomesize_otu,  height = 0, aes(x= pH, y=AGS_otu, color = site.latitude), alpha = 0.8, size =5) + 
  geom_smooth(data=genomesize_otu,aes(x= pH, y=AGS_otu), method ="lm", col= "white")+
  labs(x = "pH",y = "Average Genome Size (Mbp)")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"),name="Site.latitude")+
  theme(legend.title = element_text(colour="black", size=12, face="bold"),
        legend.text = element_text(colour="black", size=12, face="bold"),
        axis.text=element_text(size=12,face="bold"),
        axis.title=element_text(size=12,face="bold"))+
  annotate("text",x=6.5,y=5.5, label="R2 = 0.151\nP = 0.01106",
           size=5,color="black")+ggtitle("Based on GTDB Annotation")
p15
## `geom_smooth()` using formula = 'y ~ x'

summary(lm(Protein_count~AGS_otu,genomesize_otu))
## 
## Call:
## lm(formula = Protein_count ~ AGS_otu, data = genomesize_otu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -43.949 -13.864   3.517  12.874  55.741 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   432.18     117.35   3.683 0.000795 ***
## AGS_otu       839.09      22.81  36.784  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21.87 on 34 degrees of freedom
## Multiple R-squared:  0.9755, Adjusted R-squared:  0.9748 
## F-statistic:  1353 on 1 and 34 DF,  p-value: < 2.2e-16
shapiro.test(residuals(lm(Protein_count~AGS_otu,genomesize_otu)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(Protein_count ~ AGS_otu, genomesize_otu))
## W = 0.97453, p-value = 0.5615
p17<-ggplot() + geom_jitter(data=genomesize_otu,  height = 0, aes(x= AGS_otu, y=Protein_count, color = site.latitude), alpha = 0.8, size =5) + 
  geom_smooth(data=genomesize_otu,aes(x= AGS_otu, y=Protein_count), method ="lm", col= "white")+
  labs(x = "Average Genome Size (Mbp)",y = "Protein Count")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"),name="Site.latitude")+
  theme(legend.title = element_text(colour="black", size=12, face="bold"),
        legend.text = element_text(colour="black", size=12, face="bold"),
        axis.text=element_text(size=12,face="bold"),
        axis.title=element_text(size=12,face="bold"))+
  annotate("text",x=5,y=5000, label="R2 = 0.975\nP < 2.2e-16",
           size=5,color="black")+ggtitle("Based on GTDB Annotation")
p17
## `geom_smooth()` using formula = 'y ~ x'

#Supplementary Fig. 2
library(ggpubr)
## 
## 载入程辑包:'ggpubr'
## The following object is masked from 'package:ape':
## 
##     rotate
ggarrange(p15,p17,ncol = 2,nrow = 1,
          labels = c("a","b"),common.legend = TRUE,legend = "right")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#ggsave("pdf/Fig.S3_genometraits_otu.pdf",plot = last_plot(),units = "in",height = 3.5,width = 8,dpi = 600)
#Supplementary Fig.3
data1<-data.frame(t(bac.amp))
ID<-data.frame(bac.amp.ID1)
ID$OTU.ID<-paste(ID$OTU,ID$phylum,sep = ".")
env.amp1$pH2<-paste("pH",round(env.amp1$pH,digits = 2),env.amp1$sample_name,sep = ".")

opf<-"genus"
Flev<-ID[,opf] 
fung.lev<-data.frame(aggregate(data1,by=list(Flev) , sum))
rownames(fung.lev)<-fung.lev[,1]; fung.lev<-fung.lev[,-1]
data2<-data.frame(t(fung.lev))
  
total<-apply(data2, 1, sum); 
fung.relabu<-data.frame(lapply(data2, function(x) {  x / total  }) )  # change the abundance data into relative abundance#
  
fung.L<-fung.relabu
env.L<-env.amp1
fung.L <- fung.L[,order(-colSums(fung.L))] ## 
bac.genus<-cbind(env.L,fung.L)

colnames(bac.genus)
##   [1] "sample_name"                     "otu_id"                         
##   [3] "chenl_sample"                    "site"                           
##   [5] "location"                        "plot_NO"                        
##   [7] "sample_date"                     "site_NO"                        
##   [9] "forest_type"                     "Region"                         
##  [11] "MAT"                             "MAP"                            
##  [13] "latitude"                        "longitude"                      
##  [15] "altitude"                        "Coordinate.X"                   
##  [17] "Coordinate.Y"                    "TN"                             
##  [19] "TC"                              "ACa"                            
##  [21] "AFe"                             "AK"                             
##  [23] "AMg"                             "TP"                             
##  [25] "C_N"                             "C_P"                            
##  [27] "N_P"                             "soil_D"                         
##  [29] "pH"                              "Soil_PC1"                       
##  [31] "Soil_PC2"                        "Soil_PC3"                       
##  [33] "Soil_PC4"                        "Soil_PC5"                       
##  [35] "Soil_PC6"                        "plant_community_order"          
##  [37] "AllP_abu"                        "AMP_abu"                        
##  [39] "NonAMP_abu"                      "AllP_rich"                      
##  [41] "AMP_rich"                        "NonAMP_rich"                    
##  [43] "AllP_genus_rich"                 "AMP_genus_rich"                 
##  [45] "NonAMP_genus_rich"               "AllP_family_rich"               
##  [47] "AMP_family_rich"                 "NonAMP_family_rich"             
##  [49] "AllP_bsa"                        "AMP_bsa"                        
##  [51] "NonAMP_bsa"                      "AllP.PD"                        
##  [53] "AllP.mpd"                        "AllP.NRI"                       
##  [55] "AllP.mntd"                       "AllP.NTI"                       
##  [57] "AMP.PD"                          "AMP.mpd.obs"                    
##  [59] "AMP.NRI"                         "AMP.mntd.obs"                   
##  [61] "AMP.NTI"                         "NonAMP.PD"                      
##  [63] "NonAMP.mpd"                      "NonAMP.NRI"                     
##  [65] "NonAMP.mntd.obs"                 "NonAMP.NTI"                     
##  [67] "AllP.bi.mpd"                     "AllP.bi.NRI"                    
##  [69] "AllP.bi.mntd"                    "AllP.bi.NTI"                    
##  [71] "AMP.bi.mpd"                      "AMP.bi.NRI"                     
##  [73] "AMP.bi.mntd"                     "AMP.bi.NTI"                     
##  [75] "NonAMP.bi.mpd"                   "NonAMP.bi.NRI"                  
##  [77] "NonAMP.bi.mntd"                  "NonAMP.bi.NTI"                  
##  [79] "AMP.PSV"                         "NonAMP.PSV"                     
##  [81] "AllP.PSV"                        "site.latitude"                  
##  [83] "rgi.mgm.H"                       "cazy.mgm.H"                     
##  [85] "ko.mgm.H"                        "cog.mgm.H"                      
##  [87] "bac.mgm.H"                       "bac.amp.H"                      
##  [89] "rgi.mgm.S"                       "cazy.mgm.S"                     
##  [91] "ko.mgm.S"                        "cog.mgm.S"                      
##  [93] "bac.mgm.S"                       "bac.amp.S"                      
##  [95] "cog.mgm.sum"                     "rgi.mgm.sum"                    
##  [97] "cazy.mgm.sum"                    "ko.mgm.sum"                     
##  [99] "bac.mgm.sum"                     "pH2"                            
## [101] "X_"                              "Rhodoplanes"                    
## [103] "DA101"                           "Bradyrhizobium"                 
## [105] "Candidatus_Solibacter"           "Candidatus_Xiphinematobacter"   
## [107] "Candidatus_Koribacter"           "Burkholderia"                   
## [109] "Mycobacterium"                   "Salinispora"                    
## [111] "Methylosinus"                    "Candidatus_Rhabdochlamydia"     
## [113] "Nitrospira"                      "Steroidobacter"                 
## [115] "Pedomicrobium"                   "Pedosphaera"                    
## [117] "Planctomyces"                    "Kaistobacter"                   
## [119] "Gemmata"                         "Chthoniobacter"                 
## [121] "Methylibium"                     "Kitasatospora"                  
## [123] "Phenylobacterium"                "Bacillus"                       
## [125] "Flavobacterium"                  "Pseudomo_s"                     
## [127] "Bdellovibrio"                    "A17"                            
## [129] "Pseudonocardia"                  "Opitutus"                       
## [131] "Mesorhizobium"                   "Hyphomicrobium"                 
## [133] "Solirubrobacter"                 "Ramlibacter"                    
## [135] "Aquicella"                       "Fimbriimo_s"                    
## [137] "Pirellula"                       "Pilimelia"                      
## [139] "Sphingomo_s"                     "Flavisolibacter"                
## [141] "Afifella"                        "Rhizobium"                      
## [143] "Salinibacterium"                 "Agrobacterium"                  
## [145] "Aetherobacter"                   "Couchioplanes"                  
## [147] "Dokdonella"                      "Rubrivivax"                     
## [149] "Humicoccus"                      "Granulicella"                   
## [151] "A_eromyxobacter"                 "Geobacter"                      
## [153] "Conexibacter"                    "FFCH10602"                      
## [155] "Paenibacillus"                   "Streptomyces"                   
## [157] "Bosea"                           "Chitinophaga"                   
## [159] "Telmatospirillum"                "Luteibacter"                    
## [161] "Janthinobacterium"               "Devosia"                        
## [163] "Acidocella"                      "Asteroleplasma"                 
## [165] "Labrys"                          "Lysobacter"                     
## [167] "Dactylosporangium"               "Nocardioides"                   
## [169] "Thermomo_s"                      "Collimo_s"                      
## [171] "Catellatospora"                  "Chryseobacterium"               
## [173] "Mucilaginibacter"                "Actinoplanes"                   
## [175] "Acidicapsa"                      "Virgisporangium"                
## [177] "Kribbella"                       "Methylovirgula"                 
## [179] "Ellin506"                        "Candidatus_Protochlamydia"      
## [181] "Sediminibacterium"               "Averyella"                      
## [183] "Nevskia"                         "Pedobacter"                     
## [185] "Novosphingobium"                 "Serratia"                       
## [187] "OR.59"                           "Polaromo_s"                     
## [189] "JG37.AG.70"                      "Caulobacter"                    
## [191] "Microbacterium"                  "Streptosporangium"              
## [193] "Asticcacaulis"                   "Niastella"                      
## [195] "Agromyces"                       "Iamia"                          
## [197] "Balneimo_s"                      "Stenotrophomo_s"                
## [199] "Rubrobacter"                     "Silvimo_s"                      
## [201] "Singulisphaera"                  "Flavihumibacter"                
## [203] "Gemmatimo_s"                     "Luteolibacter"                  
## [205] "Diplazium"                       "Roseateles"                     
## [207] "Terriglobus"                     "Sphingobium"                    
## [209] "Lotus"                           "Cupriavidus"                    
## [211] "Cellulomo_s"                     "Actinoallomurus"                
## [213] "Nocardia"                        "Frankia"                        
## [215] "Micromonospora"                  "Parachlamydia"                  
## [217] "Comamo_s"                        "Cryptosporangium"               
## [219] "Amycolatopsis"                   "Clostridium"                    
## [221] "Legionella"                      "Pseudoxanthomo_s"               
## [223] "Segetibacter"                    "Herminiimo_s"                   
## [225] "Amaricoccus"                     "Arthrobacter"                   
## [227] "Methylobacterium"                "Plesiocystis"                   
## [229] "Perlucidibaca"                   "Skermanella"                    
## [231] "Mycopla_"                        "Sulcia"                         
## [233] "Acinetobacter"                   "Kibdelosporangium"              
## [235] "Sporocytophaga"                  "Solwaraspora"                   
## [237] "Rhodoferax"                      "Cytophaga"                      
## [239] "heteroC45_4W"                    "Myxococcus"                     
## [241] "Nitrosovibrio"                   "Treponema"                      
## [243] "Cohnella"                        "Fluviicola"                     
## [245] "Ktedonobacter"                   "Acidisoma"                      
## [247] "Modestobacter"                   "Spirochaeta"                    
## [249] "Adhaeribacter"                   "Dyadobacter"                    
## [251] "Sphingobacterium"                "Cellvibrio"                     
## [253] "Methylocella"                    "Candidatus_Amoebophilus"        
## [255] "Uliginosibacterium"              "Alicyclobacillus"               
## [257] "Rhodomicrobium"                  "X29.Apr"                        
## [259] "Rhodococcus"                     "Solitalea"                      
## [261] "Haliangium"                      "Loktanella"                     
## [263] "Ferruginibacter"                 "Chromobacterium"                
## [265] "Methylotenera"                   "Planotetraspora"                
## [267] "Ralstonia"                       "Chondromyces"                   
## [269] "Jiangella"                       "Leptothrix"                     
## [271] "Saccharopolyspora"               "Sphingopyxis"                   
## [273] "Cystobacter"                     "Neochlamydia"                   
## [275] "Hydrogenophaga"                  "Sporichthya"                    
## [277] "Elbe"                            "Prosthecobacter"                
## [279] "Dechloromo_s"                    "Methylocapsa"                   
## [281] "Phycicoccus"                     "Rickettsia"                     
## [283] "X_nnocystis"                     "Dok59"                          
## [285] "Rhodovastum"                     "Turneriella"                    
## [287] "Inquilinus"                      "Aeromicrobium"                  
## [289] "Dysgonomo_s"                     "Rickettsiella"                  
## [291] "Actinocorallia"                  "Pleomorphomo_s"                 
## [293] "Candidatus_Glomeribacter"        "Cryocola"                       
## [295] "Rhodobacter"                     "Carludovica"                    
## [297] "Nonomuraea"                      "Turicibacter"                   
## [299] "Actinomycetospora"               "Azospirillum"                   
## [301] "Geodermatophilus"                "Kaistia"                        
## [303] "Methyloversatilis"               "Polymorphospora"                
## [305] "Promicromonospora"               "Rhodopila"                      
## [307] "Sorangium"                       "Rathayibacter"                  
## [309] "Actinomadura"                    "Arthrospira"                    
## [311] "Haliscomenobacter"               "Arenimo_s"                      
## [313] "Estrella"                        "Simkania"                       
## [315] "Acanthamoeba"                    "Angiopteris"                    
## [317] "Bordetella"                      "Demequi_"                       
## [319] "Chthonomo_s"                     "Methylomo_s"                    
## [321] "Pythium"                         "Leptonema"                      
## [323] "Dictyostelium"                   "Geothrix"                       
## [325] "Hymenobacter"                    "Parvibaculum"                   
## [327] "Anno_"                           "Brevibacillus"                  
## [329] "Brownia"                         "Coprococcus"                    
## [331] "Coxiella"                        "Paludibacter"                   
## [333] "Roseomo_s"                       "Brenneria"                      
## [335] "Hydrocarboniphaga"               "Bryobacter"                     
## [337] "Congregibacter"                  "Emticicia"                      
## [339] "Lactococcus"                     "Roseococcus"                    
## [341] "Ammoniphilus"                    "Desulfovibrio"                  
## [343] "Filimo_s"                        "Kouleothrix"                    
## [345] "Nostocoida"                      "Solibacillus"                   
## [347] "Staphylococcus"                  "Caloramator"                    
## [349] "Crenothrix"                      "Crocinitomix"                   
## [351] "K82"                             "Kineosporia"                    
## [353] "Tatlockia"                       "Candidatus_Azobacteroides"      
## [355] "Propionivibrio"                  "Smaragdicoccus"                 
## [357] "Syntrophobacter"                 "YNPFFP6"                        
## [359] "Candidatus_Methylacidiphilum"    "Desulfobulbus"                  
## [361] "LP2A"                            "Methylopila"                    
## [363] "Rubricoccus"                     "Sphaerisporangium"              
## [365] "Tannerella"                      "A_erospora"                     
## [367] "Anomodon"                        "Arabidopsis"                    
## [369] "Caldilinea"                      "Candidatus_Entotheonella"       
## [371] "Candidatus_Portiera"             "Chlamydia"                      
## [373] "Desulfosporosinus"               "Elizabethkingia"                
## [375] "Elusimicrobium"                  "Enhydrobacter"                  
## [377] "Erwinia"                         "Fritschea"                      
## [379] "Leadbetterella"                  "Leptospira"                     
## [381] "Paracoccus"                      "Phaeospirillum"                 
## [383] "Shimazuella"                     "Wolbachia"                      
## [385] "Xenorhabdus"                     "Lacibacter"                     
## [387] "Longispora"                      "Azoarcus"                       
## [389] "Citricoccus"                     "Coccomyxa"                      
## [391] "Edaphobacter"                    "Euzebya"                        
## [393] "Nitrosospira"                    "Rubritalea"                     
## [395] "Spirosoma"                       "Sporomusa"                      
## [397] "Williamsia"                      "Akkermansia"                    
## [399] "Aurantimo_s"                     "Azolla"                         
## [401] "Bacteroides"                     "Brevundimo_s"                   
## [403] "Candidatus_Neoehrlichia"         "Chelatococcus"                  
## [405] "Corynebacterium"                 "Epulopiscium"                   
## [407] "Gallionella"                     "GOUTA19"                        
## [409] "Halomicronema"                   "Halothiobacillus"               
## [411] "Leifsonia"                       "Luteimo_s"                      
## [413] "Niabella"                        "Oscillochloris"                 
## [415] "Patulibacter"                    "Pelosinus"                      
## [417] "Procabacter"                     "Pseuda_bae_"                    
## [419] "Pullulanibacillus"               "Rhodocytophaga"                 
## [421] "Spermatozopsis"                  "Symbiobacterium"                
## [423] "Vogesella"                       "Xanthobacter"                   
## [425] "Acetobacter"                     "Actinoalloteichus"              
## [427] "Actinocatenispora"               "Actinopolymorpha"               
## [429] "Alistipes"                       "Andreprevotia"                  
## [431] "Aneurinibacillus"                "Aquimari_"                      
## [433] "Ardenscate_"                     "Azospira"                       
## [435] "Beijerinckia"                    "Blautia"                        
## [437] "Caldicoprobacter"                "Cocleimo_s"                     
## [439] "Coraliomargarita"                "Cryptoprodotis"                 
## [441] "Deefgea"                         "Dehalobacterium"                
## [443] "Desulfococcus"                   "Desulfomonile"                  
## [445] "Desulfotomaculum_Desulfovirgula" "Fibrobacteres.2"                
## [447] "Finegoldia"                      "Flexithrix"                     
## [449] "Gordonia"                        "Lactobacillus"                  
## [451] "Larkinella"                      "Leptotrichia"                   
## [453] "Litorilinea"                     "Longilinea"                     
## [455] "Lutibacterium"                   "Macrococcus"                    
## [457] "Magnetospirillum"                "Marinobacter"                   
## [459] "Microtetraspora"                 "Novispirillum"                  
## [461] "Oxalobacter"                     "Oxobacter"                      
## [463] "Phaeoceros"                      "Phytophthora"                   
## [465] "SC3.56"                          "Shinella"                       
## [467] "Sporosarci_"                     "Streptococcus"                  
## [469] "Thalassiosira"                   "Thauera"                        
## [471] "Verrucomicrobium"                "Waddlia"                        
## [473] "Wautersiella"                    "Weissella"                      
## [475] "A_erolinea"                      "A_erovorax"                     
## [477] "Acidiphilium"                    "Acidithiobacillus"              
## [479] "Actinomyces"                     "Acutodesmus"                    
## [481] "Aerococcus"                      "Allochromatium"                 
## [483] "Alloiococcus"                    "Alysiella"                      
## [485] "Ancylobacter"                    "Armatimo_s"                     
## [487] "Balneola"                        "Bartonella"                     
## [489] "Buchnera"                        "C39"                            
## [491] "Candidatus_Cardinium"            "Candidatus_Phytoplasma"         
## [493] "Chloronema"                      "Chroococcidiopsis"              
## [495] "Cyanophora"                      "Deinococcus"                    
## [497] "Dorea"                           "Ehrlichia"                      
## [499] "Enterococcus"                    "Exiguobacterium"                
## [501] "Flectobacillus"                  "Formivibrio"                    
## [503] "Haemophilus"                     "Marinobacterium"                
## [505] "Moorella"                        "Moraxella"                      
## [507] "MSBL3"                           "Mycoplasma"                     
## [509] "N09"                             "Nitrosomo_s"                    
## [511] "Oryza"                           "Oscillospira"                   
## [513] "Parabacteroides"                 "Peptoniphilus"                  
## [515] "Phaselicystis"                   "Photorhabdus"                   
## [517] "Planifilum"                      "Pleurozia"                      
## [519] "Polysphondylium"                 "Pontibacter"                    
## [521] "Propionicimo_s"                  "Psychrobacter"                  
## [523] "PW3"                             "Saprolegnia"                    
## [525] "Sarci_"                          "Schumannella"                   
## [527] "Tepidimicrobium"                 "Thioalkalivibrio"               
## [529] "Thiobacillus"                    "Thiorhodospira"                 
## [531] "Tindallia_Anoxy_tronum"          "TS34"                           
## [533] "Vibrio"                          "Xylanimicrobium"
bac.genus2<-bac.genus[,c(29,82,102:111)]

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ tibble  3.1.6      ✔ dplyr   1.0.10
## ✔ readr   2.1.3      ✔ stringr 1.4.1 
## ✔ purrr   0.3.5      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ psych::%+%()    masks ggplot2::%+%()
## ✖ psych::alpha()  masks ggplot2::alpha()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
bac.genus3<-gather(bac.genus2,key = "Genus",value = "Abundance",-"pH",-"site.latitude")
bac.genus3$Genus<-factor(bac.genus3$Genus,levels = c("Rhodoplanes","DA101","Bradyrhizobium",
                                                     "Candidatus_Solibacter","Candidatus_Xiphinematobacter",
                                                     "Candidatus_Koribacter","Burkholderia","Mycobacterium",
                                                     "Salinispora","Methylosinus"))

ggplot()+geom_point(data = bac.genus3,aes(x=pH,y=Abundance*100,color=site.latitude),size=3)+
  facet_wrap(.~Genus,nrow = 2,ncol = 5)+
  geom_smooth(data = bac.genus3,aes(x=pH,y=Abundance*100),method = "lm")+
  theme_bw()+ylab("Relative Abundance (%)")+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"),name="Site.latitude")+
  theme(legend.title = element_text(colour="black", size=15, face="bold"),
        legend.text = element_text(colour="black", size=15, face="bold"),
        axis.text=element_text(size=15,face="bold"),
        axis.title=element_text(size=15,face="bold"),
        strip.text = element_text(size = 12))
## `geom_smooth()` using formula = 'y ~ x'

library(ggpubr)
#ggsave("otu_genome/Bacteria_top10genus2.pdf",width = 16,height = 7)

shapiro.test(residuals(lm(bac.genus2$Rhodoplanes~bac.genus2$pH)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(bac.genus2$Rhodoplanes ~ bac.genus2$pH))
## W = 0.96651, p-value = 0.338
summary(lm(bac.genus2$Rhodoplanes~bac.genus2$pH))
## 
## Call:
## lm(formula = bac.genus2$Rhodoplanes ~ bac.genus2$pH)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.022416 -0.008384 -0.000337  0.006603  0.037603 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.062568   0.010722   5.835 1.41e-06 ***
## bac.genus2$pH -0.002454   0.002137  -1.149    0.259    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01308 on 34 degrees of freedom
## Multiple R-squared:  0.03735,    Adjusted R-squared:  0.009034 
## F-statistic: 1.319 on 1 and 34 DF,  p-value: 0.2588
shapiro.test(residuals(lm(log(bac.genus2$DA101)~bac.genus2$pH)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(log(bac.genus2$DA101) ~ bac.genus2$pH))
## W = 0.94418, p-value = 0.06861
summary(lm(log(bac.genus2$DA101)~bac.genus2$pH))
## 
## Call:
## lm(formula = log(bac.genus2$DA101) ~ bac.genus2$pH)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.38637 -0.81590  0.09182  0.91217  1.68340 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -7.4393     0.9516  -7.818 4.24e-09 ***
## bac.genus2$pH   0.7550     0.1896   3.982 0.000341 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.161 on 34 degrees of freedom
## Multiple R-squared:  0.318,  Adjusted R-squared:  0.2979 
## F-statistic: 15.85 on 1 and 34 DF,  p-value: 0.0003414
shapiro.test(residuals(lm(log(bac.genus2$Bradyrhizobium)~bac.genus2$pH)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(log(bac.genus2$Bradyrhizobium) ~ bac.genus2$pH))
## W = 0.97584, p-value = 0.6046
summary(lm(log(bac.genus2$Bradyrhizobium)~bac.genus2$pH))
## 
## Call:
## lm(formula = log(bac.genus2$Bradyrhizobium) ~ bac.genus2$pH)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.00276 -0.40891  0.05477  0.35049  0.97719 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -4.07688    0.41983  -9.711 2.46e-11 ***
## bac.genus2$pH  0.02746    0.08366   0.328    0.745    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.512 on 34 degrees of freedom
## Multiple R-squared:  0.00316,    Adjusted R-squared:  -0.02616 
## F-statistic: 0.1078 on 1 and 34 DF,  p-value: 0.7447
shapiro.test(residuals(lm(log(bac.genus2$Candidatus_Solibacter)~bac.genus2$pH)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(log(bac.genus2$Candidatus_Solibacter) ~ bac.genus2$pH))
## W = 0.9722, p-value = 0.4888
summary(lm(log(bac.genus2$Candidatus_Solibacter)~bac.genus2$pH))
## 
## Call:
## lm(formula = log(bac.genus2$Candidatus_Solibacter) ~ bac.genus2$pH)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.86413 -0.28411 -0.00203  0.14321  0.91210 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -1.75191    0.33062  -5.299 7.03e-06 ***
## bac.genus2$pH -0.45849    0.06589  -6.959 5.04e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4032 on 34 degrees of freedom
## Multiple R-squared:  0.5875, Adjusted R-squared:  0.5754 
## F-statistic: 48.42 on 1 and 34 DF,  p-value: 5.043e-08
shapiro.test(residuals(lm(log(bac.genus2$Candidatus_Xiphinematobacter)~bac.genus2$pH)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(log(bac.genus2$Candidatus_Xiphinematobacter) ~ bac.genus2$pH))
## W = 0.96822, p-value = 0.379
summary(lm(log(bac.genus2$Candidatus_Xiphinematobacter)~bac.genus2$pH))
## 
## Call:
## lm(formula = log(bac.genus2$Candidatus_Xiphinematobacter) ~ bac.genus2$pH)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.41068 -0.55439 -0.00542  0.29822  1.34609 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -2.1238     0.5773  -3.679 0.000804 ***
## bac.genus2$pH  -0.5415     0.1150  -4.707 4.11e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.704 on 34 degrees of freedom
## Multiple R-squared:  0.3945, Adjusted R-squared:  0.3767 
## F-statistic: 22.16 on 1 and 34 DF,  p-value: 4.105e-05
shapiro.test(residuals(lm(log(bac.genus2$Candidatus_Koribacter)~bac.genus2$pH)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(log(bac.genus2$Candidatus_Koribacter) ~ bac.genus2$pH))
## W = 0.98371, p-value = 0.862
summary(lm(log(bac.genus2$Candidatus_Koribacter)~bac.genus2$pH))
## 
## Call:
## lm(formula = log(bac.genus2$Candidatus_Koribacter) ~ bac.genus2$pH)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.59878 -0.37855 -0.01199  0.36615  1.31598 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -1.3010     0.5092  -2.555   0.0153 *  
## bac.genus2$pH  -0.7152     0.1015  -7.048 3.88e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6211 on 34 degrees of freedom
## Multiple R-squared:  0.5937, Adjusted R-squared:  0.5817 
## F-statistic: 49.68 on 1 and 34 DF,  p-value: 3.881e-08
shapiro.test(residuals(lm(log(bac.genus2$Burkholderia)~bac.genus2$pH)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(log(bac.genus2$Burkholderia) ~ bac.genus2$pH))
## W = 0.97527, p-value = 0.5855
summary(lm(log(bac.genus2$Burkholderia)~bac.genus2$pH))
## 
## Call:
## lm(formula = log(bac.genus2$Burkholderia) ~ bac.genus2$pH)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.00953 -0.38673 -0.05603  0.42936  0.97480 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -3.34420    0.44239  -7.559 8.86e-09 ***
## bac.genus2$pH -0.33371    0.08816  -3.785 0.000596 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5396 on 34 degrees of freedom
## Multiple R-squared:  0.2965, Adjusted R-squared:  0.2758 
## F-statistic: 14.33 on 1 and 34 DF,  p-value: 0.0005964
shapiro.test(residuals(lm(log(bac.genus2$Mycobacterium)~bac.genus2$pH)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(log(bac.genus2$Mycobacterium) ~ bac.genus2$pH))
## W = 0.9797, p-value = 0.7353
summary(lm(log(bac.genus2$Mycobacterium)~bac.genus2$pH))
## 
## Call:
## lm(formula = log(bac.genus2$Mycobacterium) ~ bac.genus2$pH)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3513 -0.4097 -0.0918  0.5553  1.2193 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -5.7983     0.5455 -10.630 2.39e-12 ***
## bac.genus2$pH   0.1538     0.1087   1.415    0.166    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6653 on 34 degrees of freedom
## Multiple R-squared:  0.0556, Adjusted R-squared:  0.02783 
## F-statistic: 2.002 on 1 and 34 DF,  p-value: 0.1662
shapiro.test(residuals(lm(sqrt(bac.genus2$Salinispora)~bac.genus2$pH)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(sqrt(bac.genus2$Salinispora) ~ bac.genus2$pH))
## W = 0.98181, p-value = 0.8048
summary(lm(sqrt(bac.genus2$Salinispora)~bac.genus2$pH))
## 
## Call:
## lm(formula = sqrt(bac.genus2$Salinispora) ~ bac.genus2$pH)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.059806 -0.013819 -0.002034  0.019920  0.076001 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.139580   0.022245   6.275  3.8e-07 ***
## bac.genus2$pH -0.018008   0.004433  -4.062 0.000271 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02713 on 34 degrees of freedom
## Multiple R-squared:  0.3268, Adjusted R-squared:  0.307 
## F-statistic:  16.5 on 1 and 34 DF,  p-value: 0.0002708
shapiro.test(residuals(lm(log(bac.genus2$Methylosinus)~bac.genus2$pH)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(log(bac.genus2$Methylosinus) ~ bac.genus2$pH))
## W = 0.96348, p-value = 0.2747
summary(lm(log(bac.genus2$Methylosinus)~bac.genus2$pH))
## 
## Call:
## lm(formula = log(bac.genus2$Methylosinus) ~ bac.genus2$pH)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8997 -0.9741  0.0638  1.1118  2.3008 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -7.8265     1.2057  -6.491    2e-07 ***
## bac.genus2$pH   0.2623     0.2403   1.092    0.283    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.471 on 34 degrees of freedom
## Multiple R-squared:  0.03385,    Adjusted R-squared:  0.005438 
## F-statistic: 1.191 on 1 and 34 DF,  p-value: 0.2827
#Suplementary Fig.4
data.genomesize.bahram<-read.csv("data/bahram/data.genomesize4.csv")

p1<-ggplot()+geom_jitter(data = data.genomesize.bahram,aes(x=pH,y=AGS),size=5)+
  geom_smooth(data = data.genomesize.bahram,aes(x=pH,y=AGS),method = "lm")+
  xlab("pH")+ylab("Average Genome Size (Mbp)")+theme_bw()+
  theme(axis.text = element_text(size = 12,color = "black",face = "bold"),
        axis.title = element_text(size = 12,color = "black",face = "bold"))+
  annotate("text",x=7,y=6, label="rho = -0.43 ***",
           size=5,color="black")+ggtitle("Based on GTDB Annotation")

p1
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 8 rows containing missing values (`geom_point()`).

p6<-ggplot()+geom_jitter(data = data.genomesize.bahram,aes(x=pH,y=AGS2_mgm),size=5)+
  geom_smooth(data = data.genomesize.bahram,aes(x=pH,y=AGS2_mgm),method = "lm")+
  xlab("pH")+ylab("Average Genome Size (Mbp)")+theme_bw()+
  theme(axis.text = element_text(size = 12,color = "black",face = "bold"),
        axis.title = element_text(size = 12,color = "black",face = "bold"))+
  annotate("text",x=7,y=6, label="rho = -0.22 *",
           size=5,color="black")+ggtitle("Based on Metagenome")

p6
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (`stat_smooth()`).
## Removed 8 rows containing missing values (`geom_point()`).

#Supplementary Fig. 4
library(ggpubr)
ggarrange(p6,p1,ncol = 2,nrow = 1,
          labels = c("a","b"),common.legend = TRUE,legend = "right")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 8 rows containing missing values (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (`stat_smooth()`).
## Removed 8 rows containing missing values (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (`stat_smooth()`).
## Removed 8 rows containing missing values (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (`stat_smooth()`).
## Removed 8 rows containing missing values (`geom_point()`).

#ggsave("pdf/Fig.S4_genometraits_bahram.pdf",plot = last_plot(),units = "in",height = 3.5,width = 8,dpi = 600)
#Supplementary Fig. 5
summary(lm(env.genomeTraits$GC_percent~env.genomeTraits$AGS))
## 
## Call:
## lm(formula = env.genomeTraits$GC_percent ~ env.genomeTraits$AGS)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3771 -0.7417  0.1210  0.5994  3.0543 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           65.4820     0.7923  82.652  < 2e-16 ***
## env.genomeTraits$AGS  -0.8438     0.1815  -4.649 4.88e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.161 on 34 degrees of freedom
## Multiple R-squared:  0.3886, Adjusted R-squared:  0.3706 
## F-statistic: 21.61 on 1 and 34 DF,  p-value: 4.88e-05
shapiro.test(residuals(lm(env.genomeTraits$GC_percent~env.genomeTraits$AGS)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(env.genomeTraits$GC_percent ~ env.genomeTraits$AGS))
## W = 0.98259, p-value = 0.8289
p12.1<-ggplot() + geom_jitter(data=env.genomeTraits,  height = 0, aes(x= AGS, y=GC_percent, color = site.latitude), alpha = 0.8, size =5) + 
  geom_smooth(data=env.genomeTraits,aes(x= AGS, y=GC_percent), method ="lm", col= "white")+
  labs(x = "Average Genome Size (Mbp)",y = "GC Content (%)")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"),name="Site.latitude")+
  theme(legend.title = element_text(colour="black", size=12, face="bold"),
        legend.text = element_text(colour="black", size=12, face="bold"),
        axis.text=element_text(size=12,face="bold"),
        axis.title=element_text(size=12,face="bold"))+
  annotate("text",x=3,y=59, label="R2 = 0.371\nP = 4.88e-05",
           size=5,color="black")+ggtitle("Based on Metagenome")
p12.1
## `geom_smooth()` using formula = 'y ~ x'

summary(lm(GC_otu~AGS_otu,genomesize_otu))
## 
## Call:
## lm(formula = GC_otu ~ AGS_otu, data = genomesize_otu)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0164537 -0.0058825  0.0000878  0.0043699  0.0143172 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.558234   0.041071  13.592 2.65e-15 ***
## AGS_otu     0.010524   0.007984   1.318    0.196    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.007654 on 34 degrees of freedom
## Multiple R-squared:  0.04863,    Adjusted R-squared:  0.02064 
## F-statistic: 1.738 on 1 and 34 DF,  p-value: 0.1962
shapiro.test(residuals(lm(GC_otu~AGS_otu,genomesize_otu)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(GC_otu ~ AGS_otu, genomesize_otu))
## W = 0.97653, p-value = 0.6276
p19<-ggplot() + geom_jitter(data=genomesize_otu,  height = 0, aes(x= AGS_otu, y=GC_otu*100, color = site.latitude), alpha = 0.8, size =5) + 
  #geom_smooth(data=genomesize_otu,aes(x= AGS_otu, y=GC_otu*100), method ="lm", col= "white")+
  labs(x = "Average Genome Size (Mbp)",y = "GC Percent (%)")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"),name="Site.latitude")+
  theme(legend.title = element_text(colour="black", size=12, face="bold"),
        legend.text = element_text(colour="black", size=12, face="bold"),
        axis.text=element_text(size=12,face="bold"),
        axis.title=element_text(size=12,face="bold"))+
  annotate("text",x=5.4,y=60, label="R2 = 0.021\nP = 0.1962",size=5,color="black")+
  ggtitle("Based on GTDB Annotation")
p19

#Supplementary Fig. 5
library(ggpubr)
ggarrange(p12.1,p19,ncol = 2,nrow = 1,
          labels = c("a","b"),common.legend = TRUE,legend = "right")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#ggsave("pdf/Fig.S5-1_genometraits_bahram.pdf",plot = last_plot(),units = "in",height = 3.5,width = 9,dpi = 600)
#Supplementary Fig. 6

summary(lm(gc_percent~AGS,data = data.genomesize.bahram))
## 
## Call:
## lm(formula = gc_percent ~ AGS, data = data.genomesize.bahram)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0204301 -0.0041983 -0.0000698  0.0039563  0.0223992 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.590390   0.010844  54.444  < 2e-16 ***
## AGS         0.007965   0.001993   3.997 0.000103 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.007065 on 140 degrees of freedom
## Multiple R-squared:  0.1024, Adjusted R-squared:  0.09601 
## F-statistic: 15.97 on 1 and 140 DF,  p-value: 0.0001034
shapiro.test(residuals(lm(gc_percent~AGS,data = data.genomesize.bahram)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(gc_percent ~ AGS, data = data.genomesize.bahram))
## W = 0.98831, p-value = 0.2776
p10<-ggplot()+geom_jitter(data = data.genomesize.bahram,aes(x=AGS2_mgm,y=GC_percent_mgm),size=5)+
  geom_smooth(data = data.genomesize.bahram,aes(x=AGS2_mgm,y=GC_percent_mgm),method = "lm")+
  ylab("GC Percent (%)")+xlab("Average Genome Size (Mbp)")+theme_bw()+
  theme(axis.text = element_text(size = 12,color = "black",face = "bold"),
        axis.title = element_text(size = 12,color = "black",face = "bold"))+
  annotate("text",x=5,y=67, label="rho = -0.57 ***",
           size=5,color="black")+ggtitle("Based on Metagenome")

p10
## `geom_smooth()` using formula = 'y ~ x'

summary(lm(gc_percent~AGS,data = data.genomesize.bahram))
## 
## Call:
## lm(formula = gc_percent ~ AGS, data = data.genomesize.bahram)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0204301 -0.0041983 -0.0000698  0.0039563  0.0223992 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.590390   0.010844  54.444  < 2e-16 ***
## AGS         0.007965   0.001993   3.997 0.000103 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.007065 on 140 degrees of freedom
## Multiple R-squared:  0.1024, Adjusted R-squared:  0.09601 
## F-statistic: 15.97 on 1 and 140 DF,  p-value: 0.0001034
shapiro.test(residuals(lm(gc_percent~AGS,data = data.genomesize.bahram)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(gc_percent ~ AGS, data = data.genomesize.bahram))
## W = 0.98831, p-value = 0.2776
p5<-ggplot()+geom_jitter(data = data.genomesize.bahram,aes(x=AGS,y=gc_percent*100),size=5)+
  geom_smooth(data = data.genomesize.bahram,aes(x=AGS,y=gc_percent*100),method = "lm")+
  ylab("GC Percent (%)")+xlab("Average Genome Size (Mbp)")+theme_bw()+
  theme(axis.text = element_text(size = 12,color = "black",face = "bold"),
        axis.title = element_text(size = 12,color = "black",face = "bold"))+
  annotate("text",x=5,y=65, label="rho = 0.25 **",
           size=5,color="black")+ggtitle("Based on GTDB Annotation")

p5
## `geom_smooth()` using formula = 'y ~ x'

#Supplementary Fig. 6
library(ggpubr)
ggarrange(p10,p5,ncol = 2,nrow = 1,
          labels = c("a","b"),common.legend = TRUE,legend = "right")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#ggsave("pdf/Fig.S5-2_genometraits_bahram.pdf",plot = last_plot(),units = "in",height = 3.5,width = 8,dpi = 600)
#Supplementary FIg.7
p1<-ggplot() + geom_jitter(data=env.amp1,  height = 0, aes(x= pH, y=bac.amp.H, color = site.latitude), alpha = 0.8, size =5) + 
  geom_smooth(data=env.amp1,aes(x= pH, y=bac.amp.H), method ="lm", col= "white")+
  labs(x = "Soil pH",y = "H'.16S")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  theme(legend.title = element_text(colour="black", size=15, face="bold"),
        legend.text = element_text(colour="black", size=15, face="bold"),
        axis.text=element_text(size=15,face="bold"),
        axis.title=element_text(size=15,face="bold"))+
  annotate("text",x=6.5,y=6.3, label="R2 = 0.481\nP = 1.653e-06",
           size=5,color="black")
p1

#ggsave("pdf/Fig.1a.corr.pH.H16S.2023.04.18.pdf", width = 4, height = 3)
summary(lm(env.amp1$bac.amp.H~env.amp1$pH))
## 
## Call:
## lm(formula = env.amp1$bac.amp.H ~ env.amp1$pH)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.40585 -0.16282 -0.04642  0.14106  0.50752 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.56163    0.18483  30.090  < 2e-16 ***
## env.amp1$pH  0.21297    0.03683   5.782 1.65e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2254 on 34 degrees of freedom
## Multiple R-squared:  0.4958, Adjusted R-squared:  0.481 
## F-statistic: 33.43 on 1 and 34 DF,  p-value: 1.653e-06
shapiro.test(residuals(lm(env.amp1$bac.amp.H~env.amp1$pH)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(env.amp1$bac.amp.H ~ env.amp1$pH))
## W = 0.97818, p-value = 0.6835
p3<-ggplot() + geom_jitter(data=env.amp1,  height = 0, aes(x= pH, y=ko.mgm.H, color = site.latitude), alpha = 0.8, size =5) + 
  geom_smooth(data=env.amp1,aes(x= pH, y=ko.mgm.H), col= "white")+
  labs(x = "Soil pH",y = "H'.KO")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  theme(legend.title = element_text(colour="black", size=15, face="bold"),
        legend.text = element_text(colour="black", size=15, face="bold"),
        axis.text=element_text(size=15,face="bold"),
        axis.title=element_text(size=15,face="bold"))+
  annotate("text",x=6.5,y=7.9, label="R2 = 0.199\nP =  0.009",
           size=5,color="black")
p3

#ggsave("pdf/Fig.1c.corr.pH.HKO.2022.12.29.pdf", width = 4, height = 3)
m2<-lm(env.amp1$ko.mgm.H ~ env.amp1$pH +  I(env.amp1$pH^2)  )
AIC(m2)
## [1] -158.4754
summary(m2)
## 
## Call:
## lm(formula = env.amp1$ko.mgm.H ~ env.amp1$pH + I(env.amp1$pH^2))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.055544 -0.016888  0.003081  0.019042  0.055272 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       8.104670   0.117085  69.221   <2e-16 ***
## env.amp1$pH      -0.087095   0.045769  -1.903   0.0658 .  
## I(env.amp1$pH^2)  0.007155   0.004317   1.657   0.1069    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02503 on 33 degrees of freedom
## Multiple R-squared:  0.245,  Adjusted R-squared:  0.1992 
## F-statistic: 5.354 on 2 and 33 DF,  p-value: 0.009689
shapiro.test(residuals(m2))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(m2)
## W = 0.95967, p-value = 0.21
p5<-ggplot() + geom_jitter(data=env.amp1,  height = 0, aes(x= pH, y = rgi.mgm.H, color = site.latitude), alpha = 0.8, size =5) + 
  geom_smooth(data=env.amp1,aes(x= pH, y=rgi.mgm.H), method = "lm", color = "white")+
  labs(x = "Soil pH",y = "H'.ARG")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  theme(legend.title = element_text(colour="black", size=15, face="bold"),
        legend.text = element_text(colour="black", size=15, face="bold"),
        axis.text=element_text(size=15,face="bold"),
        axis.title=element_text(size=15,face="bold"))+
  annotate("text",x=6.5,y=4.9, label="R2 = 0.190\nP = 0.004",
           size=5,color="black")
p5

summary(lm(env.amp1$rgi.mgm.H~env.amp1$pH))
## 
## Call:
## lm(formula = env.amp1$rgi.mgm.H ~ env.amp1$pH)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.040812 -0.012858  0.000664  0.014349  0.049838 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.917865   0.017772 276.712  < 2e-16 ***
## env.amp1$pH -0.010755   0.003542  -3.037  0.00457 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02168 on 34 degrees of freedom
## Multiple R-squared:  0.2133, Adjusted R-squared:  0.1902 
## F-statistic: 9.221 on 1 and 34 DF,  p-value: 0.004571
shapiro.test(residuals(lm(env.amp1$rgi.mgm.H~env.amp1$pH)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(env.amp1$rgi.mgm.H ~ env.amp1$pH))
## W = 0.98332, p-value = 0.8506
#ggsave("pdf2/Fig.2b.corr.pH.HARG.2022.12.29.pdf", width = 4, height = 3)

p7<-ggplot() + geom_jitter(data=env.amp1,  height = 0, aes(x= pH, y = cazy.mgm.H, color = site.latitude), alpha = 0.8, size =5) + 
  geom_smooth(data=env.amp1,aes(x= pH, y=cazy.mgm.H), method = "lm", color = "white")+
  labs(x = "Soil pH",y = "H'.Cazy")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  theme(legend.title = element_text(colour="black", size=15, face="bold"),
        legend.text = element_text(colour="black", size=15, face="bold"),
        axis.text=element_text(size=15,face="bold"),
        axis.title=element_text(size=15,face="bold"))+
  annotate("text",x=6.5,y=3.55, label="R2 = 0.833\nP = 5.52e-15",
           size=5,color="black")
p7

summary(lm(env.amp1$cazy.mgm.H~env.amp1$pH))
## 
## Call:
## lm(formula = env.amp1$cazy.mgm.H ~ env.amp1$pH)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.033579 -0.011907 -0.002831  0.011534  0.057275 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.729231   0.017529  212.74  < 2e-16 ***
## env.amp1$pH -0.046286   0.003493  -13.25 5.52e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02138 on 34 degrees of freedom
## Multiple R-squared:  0.8378, Adjusted R-squared:  0.833 
## F-statistic: 175.6 on 1 and 34 DF,  p-value: 5.52e-15
shapiro.test(residuals(lm(env.amp1$cazy.mgm.H~env.amp1$pH)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(env.amp1$cazy.mgm.H ~ env.amp1$pH))
## W = 0.97247, p-value = 0.4966
#ggsave("pdf2/Fig.2c.corr.pH.HCazy.2022.12.29.pdf", width = 4, height = 3)

p1.1<-ggplot() + geom_jitter(data=env.amp1,  height = 0, aes(x= round(ko.mgm.H,digits = 2), y=bac.amp.H, color = site.latitude), alpha = 0.8, size =5) + 
  geom_smooth(data=env.amp1,aes(x= ko.mgm.H, y=bac.amp.H), method ="lm", col= "white")+
  labs(x = "H'.KO",y = "H'.16S")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  theme(legend.title = element_text(colour="black", size=15, face="bold"),
        legend.text = element_text(colour="black", size=15, face="bold"),
        axis.text=element_text(size=15,face="bold"),
        axis.title=element_text(size=15,face="bold"))+
  annotate("text",x=7.87,y=7.2, label="R2 = 0.294\nP = 0.00037",
           size=5,color="black")
p1.1

#ggsave("pdf/Fig.1b.corr.pH.H16S.2023.04.18.pdf", width = 4, height = 3)
summary(lm(env.amp1$bac.amp.H~env.amp1$ko.mgm.H))
## 
## Call:
## lm(formula = env.amp1$bac.amp.H ~ env.amp1$ko.mgm.H)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.48005 -0.22545  0.00627  0.19960  0.63453 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         55.902     12.478    4.48 8.03e-05 ***
## env.amp1$ko.mgm.H   -6.274      1.588   -3.95 0.000373 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2628 on 34 degrees of freedom
## Multiple R-squared:  0.3146, Adjusted R-squared:  0.2944 
## F-statistic: 15.61 on 1 and 34 DF,  p-value: 0.0003731
shapiro.test(residuals(lm(env.amp1$bac.amp.H~env.amp1$ko.mgm.H)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(env.amp1$bac.amp.H ~ env.amp1$ko.mgm.H))
## W = 0.97175, p-value = 0.4752
p3.1<-ggplot() + geom_jitter(data=env.genomeTraits,  height = 0, aes(x= AGS, y=round(ko.mgm.H,digits = 2), color = site.latitude), alpha = 0.8, size =5) + 
  geom_smooth(data=env.genomeTraits,aes(x= AGS, y=ko.mgm.H), method = "lm",col= "white")+
  labs(x = "Average Genome Size (Mbp)",y = "H'.KO")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  theme(legend.title = element_text(colour="black", size=15, face="bold"),
        legend.text = element_text(colour="black", size=15, face="bold"),
        axis.text=element_text(size=15,face="bold"),
        axis.title=element_text(size=15,face="bold"))+
  annotate("text",x=6,y=7.89, label="R2 = 0.0006\nP = 0.886",
           size=5,color="black")

p3.1

#ggsave("pdf2/Fig.2a.corr.pH.HKO.2022.12.29.pdf", width = 4, height = 3)
summary(lm(env.genomeTraits$ko.mgm.H~env.genomeTraits$AGS))
## 
## Call:
## lm(formula = env.genomeTraits$ko.mgm.H ~ env.genomeTraits$AGS)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.057973 -0.019978 -0.003867  0.020544  0.049300 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           7.8596291  0.0193594 405.986   <2e-16 ***
## env.genomeTraits$AGS -0.0006406  0.0044351  -0.144    0.886    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02837 on 34 degrees of freedom
## Multiple R-squared:  0.0006132,  Adjusted R-squared:  -0.02878 
## F-statistic: 0.02086 on 1 and 34 DF,  p-value: 0.886
shapiro.test(residuals(lm(env.genomeTraits$ko.mgm.H~env.genomeTraits$AGS)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(env.genomeTraits$ko.mgm.H ~ env.genomeTraits$AGS))
## W = 0.96903, p-value = 0.3996
p5.1<-ggplot() + geom_jitter(data=env.genomeTraits,  height = 0, aes(x= AGS, y=round(rgi.mgm.H,digits = 2), color = site.latitude), alpha = 0.8, size =5) + 
  geom_smooth(data=env.genomeTraits,aes(x= AGS, y=rgi.mgm.H), method = "lm",col= "white")+
  labs(x = "Average Genome Size (Mbp)",y = "H'.ARG")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  theme(legend.title = element_text(colour="black", size=15, face="bold"),
        legend.text = element_text(colour="black", size=15, face="bold"),
        axis.text=element_text(size=15,face="bold"),
        axis.title=element_text(size=15,face="bold"))+
  annotate("text",x=6,y=4.9, label="R2 = 0.239\nP = 0.001",
           size=5,color="black")
p5.1

#ggsave("pdf2/Fig.2a.corr.pH.HKO.2022.12.29.pdf", width = 4, height = 3)
summary(lm(env.genomeTraits$rgi.mgm.H~env.genomeTraits$AGS))
## 
## Call:
## lm(formula = env.genomeTraits$rgi.mgm.H ~ env.genomeTraits$AGS)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.050940 -0.013002  0.003482  0.014833  0.037889 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          4.816838   0.014333 336.063  < 2e-16 ***
## env.genomeTraits$AGS 0.011384   0.003284   3.467  0.00145 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02101 on 34 degrees of freedom
## Multiple R-squared:  0.2612, Adjusted R-squared:  0.2395 
## F-statistic: 12.02 on 1 and 34 DF,  p-value: 0.001447
shapiro.test(residuals(lm(env.genomeTraits$rgi.mgm.H~env.genomeTraits$AGS)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(env.genomeTraits$rgi.mgm.H ~ env.genomeTraits$AGS))
## W = 0.96708, p-value = 0.3513
p7.1<-ggplot() + geom_jitter(data=env.genomeTraits,  height = 0, aes(x= AGS, y=cazy.mgm.H, color = site.latitude), alpha = 0.8, size =5) + 
  geom_smooth(data=env.genomeTraits,aes(x= AGS, y=cazy.mgm.H), method = "lm",col= "white")+
  labs(x = "Average Genome Size (Mbp)",y = "H'.Cazy")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  theme(legend.title = element_text(colour="black", size=15, face="bold"),
        legend.text = element_text(colour="black", size=15, face="bold"),
        axis.text=element_text(size=15,face="bold"),
        axis.title=element_text(size=15,face="bold"))+
  annotate("text",x=6,y=3.6, label="R2 = 0.322\nP = 0.0001",
           size=5,color="black")
p7.1

#ggsave("pdf2/Fig.2a.corr.pH.HKO.2022.12.29.pdf", width = 4, height = 3)
summary(lm(env.genomeTraits$cazy.mgm.H~env.genomeTraits$AGS))
## 
## Call:
## lm(formula = env.genomeTraits$cazy.mgm.H ~ env.genomeTraits$AGS)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.076645 -0.025371  0.006949  0.026161  0.081067 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          3.382131   0.029386   115.1  < 2e-16 ***
## env.genomeTraits$AGS 0.028276   0.006732     4.2 0.000182 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04307 on 34 degrees of freedom
## Multiple R-squared:  0.3416, Adjusted R-squared:  0.3222 
## F-statistic: 17.64 on 1 and 34 DF,  p-value: 0.0001819
shapiro.test(residuals(lm(env.genomeTraits$cazy.mgm.H~env.genomeTraits$AGS)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(env.genomeTraits$cazy.mgm.H ~ env.genomeTraits$AGS))
## W = 0.97231, p-value = 0.4918
#Supplementary FIg.7
library(ggpubr)
ggarrange(p1,p3,p5,p7,
          p1.1,p3.1,p5.1,p7.1,
          ncol = 4,nrow = 2,widths=c(1,1),heights=c(1,1),common.legend = TRUE,legend = "right",
          labels = c("a","b","c","d","e","f","g","h"))

#ggsave("pdf/Supplementary.Fig2_richness&diversity.pdf",plot = last_plot(),units = "in",height = 7,width = 16,dpi = 600)
#Supplementary Fig. 8
library(linkET)
library(dplyr)

colnames(bac.amp1) == colnames(ko.mgm1)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
bac.amp2<-data.frame(t(bac.amp1))
bac.amp2<-data.frame(apply(bac.amp2,1,function(x){x/sum(x)}))

ko.mgm2<-data.frame(t(ko.mgm1))
ko.mgm2<-data.frame(apply(ko.mgm2,1,function(x){x/sum(x)}))

bac.ko<-cbind(data.frame(t(bac.amp1)),data.frame(t(ko.mgm1)))

env.mantel<-env.genomeTraits[,c("AGS","ACN","GC_percent","Growthrate",
                                "bac.amp.S","bac.amp.H","ko.mgm.S","ko.mgm.H",
                                "latitude","longitude","altitude","MAP","MAT",
                                "TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N",
                                "C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich")]

colnames(env.mantel)<-c("AGS","ACN","GC content","Doubling time",
                        "S.16S","H'.16S","S.KO","H'.KO",
                        "Latitude","Longitude","Altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe", "AK","C_N","C_P", "N_P", "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness")
#plot
mantel <- mantel_test(bac.ko, env.mantel,
                      spec_select = list(BacteriaCom = 1:22579,
                                         FunctionCom = 22580:33644),
                      spec_dist =  dist_func(.FUN = "vegdist", method = "bray"),
                      env_dist = dist_func(.FUN = "vegdist", method = "euclidean")) %>% 
  mutate(rd = cut(r, breaks = c(-Inf, 0.2, 0.4, Inf),
                  labels = c("< 0.2", "0.2 - 0.4", ">= 0.4")),
         pd = cut(p, breaks = c(-Inf, 0.01, 0.05, Inf),
                  labels = c("< 0.01", "0.01 - 0.05", ">= 0.05")))

qcorrplot(correlate(env.mantel,method = "spearman"),type = "lower", diag = FALSE,
          grid_size = 0.25) +
  geom_square() +
  geom_mark(sep = '\n',size=3,sig_level = c(0.05,0.01,0.001),sig_thres = 0.05)+
  geom_couple(aes(colour = pd, size = rd), data = mantel, curvature = 0.1) +
  scale_fill_gradientn(colours = RColorBrewer::brewer.pal(11, "RdYlBu")) +
  #scale_fill_gradient(guide = "legend", high='#2c7bb6', low='#d7191c',name="rho")+
  scale_size_manual(values = c(0.5, 1, 2)) +
  scale_colour_manual(values = c("#f6e8c3", "#c7eae5", "#A2A2A288")) +
  #scale_colour_manual(values = color_pal(3, 0)) +
  guides(size = guide_legend(title = "Mantel's r",
                             override.aes = list(colour = "grey35"), 
                             order = 2),
         colour = guide_legend(title = "Mantel's p", 
                               override.aes = list(size = 3), 
                               order = 1),
         fill = guide_colorbar(title = "Spearman's rho", order = 3))

#Supplementary Fig. 9
p1<-ggplot() + geom_jitter(data=env.amp1,  height = 0, aes(x= pH, y=bac.mgm.H, color = site.latitude), alpha = 0.8, size =5) + 
  #geom_smooth(data=env.amp1,aes(x= pH, y=bac.mgm.H), method ="lm", col= "white")+
  labs(x = "Soil pH",y = "H'.Bacteria.mgm")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  theme(legend.title = element_text(colour="black", size=15, face="bold"),
        legend.text = element_text(colour="black", size=15, face="bold"),
        axis.text=element_text(size=15,face="bold"),
        axis.title=element_text(size=15,face="bold"))+
  annotate("text",x=6.5,y=6.3, label="R2 = 0.009\nP = 0.253",
           size=5,color="black")
p1

#ggsave("pdf2/Fig.S2A.corr.pH.H16S.2023.04.18.pdf", width = 6, height = 4)
summary(lm(env.amp1$bac.mgm.H~env.amp1$pH))
## 
## Call:
## lm(formula = env.amp1$bac.mgm.H ~ env.amp1$pH)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.63980 -0.18019  0.01608  0.20693  0.48888 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.34786    0.23546  26.960   <2e-16 ***
## env.amp1$pH  0.05446    0.04692   1.161    0.254    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2872 on 34 degrees of freedom
## Multiple R-squared:  0.03812,    Adjusted R-squared:  0.009826 
## F-statistic: 1.347 on 1 and 34 DF,  p-value: 0.2538
shapiro.test(residuals(lm(env.amp1$bac.mgm.H~env.amp1$pH)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(env.amp1$bac.mgm.H ~ env.amp1$pH))
## W = 0.98188, p-value = 0.807
library(lme4)
## 载入需要的程辑包:Matrix
## Warning: 程辑包'Matrix'是用R版本4.2.2 来建造的
## 
## 载入程辑包:'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(lmerTest)
## 
## 载入程辑包:'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
library(sjPlot)
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
lm<-lmer(bac.amp.H~1+pH+(1|latitude),data = env.amp1,
         REML = FALSE,control=lmerControl(optimizer="bobyqa"))

shapiro.test(residuals(lm))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm)
## W = 0.97453, p-value = 0.5614
summary(lm)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: bac.amp.H ~ 1 + pH + (1 | latitude)
##    Data: env.amp1
## Control: lmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##     -0.6      5.8      4.3     -8.6       32 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.8410 -0.6192 -0.1238  0.4461  1.9018 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  latitude (Intercept) 0.009959 0.09979 
##  Residual             0.038044 0.19505 
## Number of obs: 36, groups:  latitude, 12
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)  5.57559    0.21061 13.64329   26.47 4.01e-13 ***
## pH           0.21013    0.04194 13.72478    5.01 0.000203 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##    (Intr)
## pH -0.979
tab_model(lm)
  bac.amp.H
Predictors Estimates CI p
(Intercept) 5.58 5.15 – 6.00 <0.001
pH 0.21 0.12 – 0.30 <0.001
Random Effects
σ2 0.04
τ00 latitude 0.01
ICC 0.21
N latitude 12
Observations 36
Marginal R2 / Conditional R2 0.496 / 0.601
p2<-ggplot() + geom_jitter(data=env.amp1,  height = 0, aes(x= pH, y=bac.mgm.S, color = site.latitude), alpha = 0.8, size =5) + 
  #geom_smooth(data=env.amp1,aes(x= pH, y=bac.mgm.S), method ="loess", col= "white")+
  labs(x = "Soil pH",y = "S.Bacteria.mgm")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  theme(legend.title = element_text(colour="black", size=15, face="bold"),
        legend.text = element_text(colour="black", size=15, face="bold"),
        axis.text=element_text(size=15,face="bold"),
        axis.title=element_text(size=15,face="bold"))+
  annotate("text",x=6.5,y=33500, label="R2 = 0.0184\nP = 0.429",
           size=5,color="black")
p2

#ggsave("pdf2/Fig.S2B.corr.pH.H16S.2023.04.18.pdf", width = 6, height = 4)
summary(lm(env.amp1$bac.mgm.S~env.amp1$pH))
## 
## Call:
## lm(formula = env.amp1$bac.mgm.S ~ env.amp1$pH)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -827.16 -280.83    0.17  155.35  958.18 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 33869.01     322.51   105.0   <2e-16 ***
## env.amp1$pH    51.42      64.27     0.8    0.429    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 393.3 on 34 degrees of freedom
## Multiple R-squared:  0.01848,    Adjusted R-squared:  -0.01039 
## F-statistic: 0.6402 on 1 and 34 DF,  p-value: 0.4292
shapiro.test(residuals(lm(env.amp1$bac.mgm.S~env.amp1$pH)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(env.amp1$bac.mgm.S ~ env.amp1$pH))
## W = 0.96964, p-value = 0.4158
#Supplementary Fig. 10
env.amp1$sample_name==env.genomeTraits$sample_name
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                     "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                     "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(env.amp1[,c("bac.mgm.H","bac.mgm.S","bac.amp.H","bac.amp.S", "ko.mgm.H","ko.mgm.S","rgi.mgm.H","rgi.mgm.S", "cazy.mgm.H","cazy.mgm.S")])

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)


r<-data.frame(spman.d12$r)
r$X<-row.names(r)
r.long <- gather(r, Y, r, 1 : ncol(r)-1, factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(r)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(cor.out$r, 2)


cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))

cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))

cor.out$Y <- factor(cor.out$Y, levels = c("bac.mgm.H","bac.mgm.S","bac.amp.H","bac.amp.S", "ko.mgm.H","ko.mgm.S","rgi.mgm.H","rgi.mgm.S", "cazy.mgm.H","cazy.mgm.S"))

cor.out$Y <- factor(cor.out$Y, labels = c("H'.bacteria.mgm","S.bacteria.mgm","H'.16S","S.16S","H'.KO","S.KO","H'.ARG","S.ARG","H'.Cazy","S.Cazy"))

ggplot(cor.out, aes(Y,X)) + 
  geom_tile(aes(fill = r), size=1)+
  geom_text(aes(label = r), size = 1.5)+
  #scale_fill_gradient(guide = "legend", high='green', low='blue')+
  scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
  theme(axis.title= element_blank(),
        axis.text.x=element_text(colour="black", size=8, face="bold",angle = 20),
        axis.text.y=element_text(colour="black", size=8, face="bold"))

cor.out.sig <- cor.out
cor.out.sig$r[cor.out.sig$p > 0.01] <- 0

p4<-ggplot(cor.out.sig, aes(Y,X)) + 
  geom_tile(aes(fill = r), size=1)+
  geom_text(data=  cor.out.sig[cor.out.sig$r!=0,], aes(label = r), size = 2.5, font="bold")+theme_bw()+
  #scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
  scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
   geom_hline(yintercept = c(8.5,9.5), color = "orange")+
   theme(axis.title= element_blank(),
        axis.text.x=element_text(colour="black", size=12, face="bold",angle = 90, hjust=1,vjust = 0.5),
        axis.text.y=element_text(colour="black", size=12, face="bold"))
p4

#ggsave("pdf/Supplementary_Fig.9.pdf", width = 8, height = 6)
#Supplementary Fig. 11
p1<-ggplot() + geom_jitter(data=env.genomeTraits,  height = 0, aes( x=bac.amp.S,y= AGS, color = site.latitude), alpha = 0.8, size =5) + 
  geom_smooth(data=env.genomeTraits,aes(y= AGS, x=bac.amp.S), method ="lm", col= "white")+
  labs(y = "Average Genome Size (Mbp)",x = "S.16S")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  theme(legend.title = element_text(colour="black", size=15, face="bold"),
        legend.text = element_text(colour="black", size=15, face="bold"),
        axis.text=element_text(size=15,face="bold"),
        axis.title=element_text(size=15,face="bold"))+
  annotate("text",x=4000,y=6.5, label="R2 = 0.187\nP = 0.0049",
           size=5,color="black")
p1
## `geom_smooth()` using formula = 'y ~ x'

summary(lm(env.genomeTraits$AGS~env.genomeTraits$bac.amp.S))
## 
## Call:
## lm(formula = env.genomeTraits$AGS ~ env.genomeTraits$bac.amp.S)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.74244 -0.56667 -0.09104  0.62071  1.85780 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 6.8181785  0.8744669   7.797  4.5e-09 ***
## env.genomeTraits$bac.amp.S -0.0007490  0.0002489  -3.009  0.00491 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.975 on 34 degrees of freedom
## Multiple R-squared:  0.2103, Adjusted R-squared:  0.1871 
## F-statistic: 9.054 on 1 and 34 DF,  p-value: 0.004911
shapiro.test(residuals(lm(env.genomeTraits$AGS~env.genomeTraits$bac.amp.S)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(env.genomeTraits$AGS ~ env.genomeTraits$bac.amp.S))
## W = 0.97275, p-value = 0.5052
p2<-ggplot() + geom_jitter(data=env.genomeTraits,  height = 0, aes( x=bac.amp.H,y= AGS, color = site.latitude), alpha = 0.8, size =5) + 
  geom_smooth(data=env.genomeTraits,aes(y= AGS, x=bac.amp.H), method ="lm", col= "white")+
  labs(y = "Average Genome Size (Mbp)",x = "H'.16S")+
  theme_bw()+
  scale_colour_manual(values= c(blue2red(15)[1:5], blue2red(17)[11:15], "green", "black"))+
  theme(legend.title = element_text(colour="black", size=15, face="bold"),
        legend.text = element_text(colour="black", size=15, face="bold"),
        axis.text=element_text(size=15,face="bold"),
        axis.title=element_text(size=15,face="bold"))+
  annotate("text",x=7,y=6.5, label="R2 = 0.222\nP = 0.0021",
           size=5,color="black")
p2
## `geom_smooth()` using formula = 'y ~ x'

summary(lm(env.genomeTraits$AGS~env.genomeTraits$bac.amp.H))
## 
## Call:
## lm(formula = env.genomeTraits$AGS ~ env.genomeTraits$bac.amp.H)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6137 -0.5946 -0.1699  0.5811  2.0425 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 15.5164     3.4087   4.552 6.49e-05 ***
## env.genomeTraits$bac.amp.H  -1.7076     0.5153  -3.314  0.00219 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9539 on 34 degrees of freedom
## Multiple R-squared:  0.2441, Adjusted R-squared:  0.2219 
## F-statistic: 10.98 on 1 and 34 DF,  p-value: 0.002193
shapiro.test(residuals(lm(env.genomeTraits$AGS~env.genomeTraits$bac.amp.H)))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(lm(env.genomeTraits$AGS ~ env.genomeTraits$bac.amp.H))
## W = 0.96019, p-value = 0.2179
#Supplementary Fig. 11
library(ggpubr)
ggarrange(p1,p2,
          ncol = 2,nrow = 1,widths=c(1,1),heights=c(1,1),common.legend = TRUE,legend = "right",
          labels = c("a","b"))
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#ggsave("pdf/Supplementary.Fig.3_16S&AGS.pdf",plot = last_plot(),units = "in",height = 3.5,width = 8,dpi = 600)
#Supplementary Fig. 12a
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                      "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                      "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(t(cog.mgm1))

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)


r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)

r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
##  num [1:480] 0.2 0.15 0.03 -0.43 -0.12 -0.08 -0.01 0.24 0.58 0.49 ...
cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))

library(splitstackshape)

cor.out.sig <- cor.out
cor.out.sig$r[cor.out.sig$p > 0.01] <- 0
cor.out.sig$Y_2<-"COGs"

ggplot(cor.out.sig, aes(Y,X)) +
  facet_grid(.~ Y_2,  scales = "free", space = "free" )+
  geom_tile(aes(fill = r), size=0)+
  geom_text(data=  cor.out.sig[cor.out.sig$r!=0,], aes(label = r), size = 2, font="bold")+scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
   geom_hline(yintercept = c(8.5,9.5), color = "orange")+
  theme(axis.title= element_blank(),
        strip.text = element_text(colour="black",size = 12),
        axis.text.x=element_text(colour="black", size=12, face="bold"),
        axis.text.y=element_text(colour="black", size=12, face="bold"))

#ggsave("pdf/Supplementary.Fig10a.env.cog.genes.2023.01.05.pdf", width = 10, height = 4)
#Supplementary Fig. 12b
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                      "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                      "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(t(ko.mgm1))

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)


r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)

r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
##  num [1:221300] -0.06 -0.01 0.25 0.13 -0.03 -0.19 -0.28 -0.33 -0.31 -0.2 ...
cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))

cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))

library(splitstackshape)

cor.out.sig <- cor.out
cor.out.sig$r[cor.out.sig$p > 0.01] <- 0
cor.out.sig$Y_2<-"All annotated KOs"
  
ggplot(cor.out.sig, aes(Y,X)) +
  facet_grid(.~ Y_2,  scales = "free", space = "free" )+
  geom_tile(aes(fill = r), size=0)+scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
   geom_hline(yintercept = c(8.5,9.5), color = "orange")+
  theme(strip.text = element_text(size = 12,face="bold", color = c("black")),
        #strip.background = element_rect(fill = "grey"),
        panel.spacing = unit(0.1, "lines"),
        axis.title= element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.y=element_text(colour="black", size=15, face="bold"))

#ggsave("pdf/Supplementary.Fig10b.corrHeatmap.env.ko.genes.2023.01.05.pdf", width = 20, height = 5)
#Supplementary Fig. 13
ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)

ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                      "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                      "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(t(ko.sel))

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)


r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)

r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
##  num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))

cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))

library(splitstackshape)

cor.out<-cSplit(cor.out, "Y", "_")

cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
                                              "Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))

cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
                                             "Energy metabolism","Membrane transport","CC","GDM","MAC"))


cor.out.sub<-cor.out[cor.out$Y_4=="GBM",]
levels(droplevels(factor(cor.out.sub$Y_5)))
##  [1] "Arabinogalactan.biosynthesis.Mycobacterium"                         
##  [2] "Exopolysaccharide.biosynthesis"                                     
##  [3] "Glycosaminoglycan.biosynthesis.chondroitin.sulfate.dermatan.sulfate"
##  [4] "Glycosaminoglycan.biosynthesis.heparan.sulfate.heparin"             
##  [5] "Glycosaminoglycan.degradation"                                      
##  [6] "Glycosphingolipid.biosynthesis.ganglio.series"                      
##  [7] "Glycosphingolipid.biosynthesis.globo.and.isoglobo.series"           
##  [8] "Glycosphingolipid.biosynthesis.lacto.and.neolacto.series"           
##  [9] "Glycosylphosphatidylinositol.anchor.biosynthesis"                   
## [10] "Lipoarabinomannan.biosynthesis"                                     
## [11] "Lipopolysaccharide.biosynthesis"                                    
## [12] "Mannose.type.O.glycan.biosynthesis"                                 
## [13] "N.Glycan.biosynthesis"                                              
## [14] "O.Antigen.nucleotide.sugar.biosynthesis"                            
## [15] "O.Antigen.repeat.unit.biosynthesis"                                 
## [16] "Other.glycan.degradation"                                           
## [17] "Other.types.of.O.glycan.biosynthesis"                               
## [18] "Peptidoglycan.biosynthesis"                                         
## [19] "Teichoic.acid.biosynthesis"                                         
## [20] "Various.types.of.N.glycan.biosynthesis"
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, levels = c( "Arabinogalactan.biosynthesis.Mycobacterium", "Exopolysaccharide.biosynthesis", "Glycosaminoglycan.biosynthesis.chondroitin.sulfate.dermatan.sulfate", 
                                                       "Glycosaminoglycan.biosynthesis.heparan.sulfate.heparin", "Glycosaminoglycan.degradation", "Glycosphingolipid.biosynthesis.ganglio.series", 
                                                       "Glycosphingolipid.biosynthesis.globo.and.isoglobo.series", "Glycosphingolipid.biosynthesis.lacto.and.neolacto.series", "Glycosylphosphatidylinositol.anchor.biosynthesis", 
                                                       "Lipoarabinomannan.biosynthesis", "Lipopolysaccharide.biosynthesis", "O.Antigen.nucleotide.sugar.biosynthesis",  
                                                       "O.Antigen.repeat.unit.biosynthesis", "Mannose.type.O.glycan.biosynthesis", 
                                                       "N.Glycan.biosynthesis","Peptidoglycan.biosynthesis","Other.glycan.degradation", "Other.types.of.O.glycan.biosynthesis", "Various.types.of.N.glycan.biosynthesis", 
                                                        "Teichoic.acid.biosynthesis"))

cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c( "ABM", "Exopolysaccharide.biosynthesis", "C", 
                                                       "H", "GD", "S", 
                                                       "GI", "L", "A", 
                                                       "Lipoarabinomannan", "Lipopolysaccharide.biosynthesis", "O.Antigen.nucleotide.sugar.biosynthesis", "ARUB","M.G", 
                                                       "N.Glycan", "Peptidoglycan.biosynthesis", 
                                                       "Other.glycan.D", "O", "VN.glycanB", 
                                                       "Teichoic.acid.biosyn"))

p1<-ggplot(cor.out.sub, aes(Y_1,X)) +
  facet_grid(.~ Y_5,  scales = "free", space = "free" )+
  geom_tile(aes(fill = r), size=0)+
  scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
  geom_hline(yintercept = c(8.5,9.5), color = "orange")+
  theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
        strip.background = element_rect(fill = "red"),
        panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.y=element_text(colour="black", size=18, face="bold")) +
  labs(caption="ABM: Arabinogalactan.biosynthesis.Mycobacterium;C: Glycosaminoglycan.biosynthesis.chondroitin.sulfate.dermatan.sulfate; H: Glycosaminoglycan.biosynthesis.heparan.sulfate.heparin; GD: Glycosaminoglycan.degradation; S: Glycosphingolipid.biosynthesis.ganglio.series; GI: Glycosphingolipid.biosynthesis.globo.and.isoglobo.series; L: Glycosphingolipid.biosynthesis.lacto.and.neolacto.series; 
       A: Glycosylphosphatidylinositol.anchor.biosynthesis; Lipoarabinomannan: Lipoarabinomannan.biosynthesis;ARUB: O.Antigen.repeat.unit.biosynthesis; M.G: Mannose.type.O.glycan.biosynthesis; N.Glycan: N.Glycan.biosynthesis; Other.glycan.D: Other.glycan.degradation; O: Other.types.of.O.glycan.biosynthesis; VN.glycanB: Various.types.of.N.glycan.biosynthesis,Teichoic.acid.biosyn: Teichoic.acid.biosynthesis" )
p1

#ggsave("pdf2/Fig.S6a.corrHeatmap.env.Glycan.2023.01.05.pdf", width = 30, height = 7)

env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                      "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                      "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(t(cazy.mgm1))

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)

r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)

r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
##  num [1:2780] 0.24 0.18 -0.22 -0.48 -0.11 0.09 0.12 0.35 0.51 0.61 ...
cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))

cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))

library(splitstackshape)

cor.out.sig <- cor.out
cor.out.sig$r[cor.out.sig$p > 0.01] <- 0
cor.out.sig$Y_2<-"Carbohydrate-active enzymes (CAZy)"

p2<-ggplot(cor.out.sig, aes(Y,X)) +
  facet_grid(.~ Y_2,  scales = "free", space = "free" )+
  geom_tile(aes(fill = r), size=0)+
  geom_text(data=  cor.out.sig[cor.out.sig$r!=0,], aes(label = r), size = 1.5, font="bold")+
  scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
  geom_hline(yintercept = c(8.5,9.5), color = "orange")+
  theme(strip.text = element_text(size = 12,face="bold", color = c("white")),
        strip.background = element_rect(fill = "red"),
        axis.title= element_blank(),
        axis.text.x=element_text(colour="black", size=12, face="bold",angle = 90, hjust=1,vjust = 0.5),
        axis.text.y=element_text(colour="black", size=18, face="bold"))
p2

#ggsave("pdf2/Fig.S6b.corrHeatmap.env.cazy.genes.2023.01.05.pdf", width = 20, height = 6)
#Supplementary Fig. 13
library(ggpubr)
ggarrange(p1,p2,
          ncol = 1,nrow = 2,widths=c(1,1),heights=c(1,1),common.legend = TRUE,legend = "right",
          labels = c("a","b"))

#ggsave("pdf/Supplementary.Fig.S12.pdf",plot = last_plot(),units = "in",height = 13,width = 30,dpi = 600)
#Supplementary Fig. 14
ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)


ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                      "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                      "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(t(ko.sel))

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)


r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)

r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
##  num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))

cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))

library(splitstackshape)

cor.out<-cSplit(cor.out, "Y", "_")

cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
                                              "Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))

cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
                                             "Energy metabolism","Membrane transport","CC","GDM","MAC"))


cor.out.sub<-cor.out[cor.out$Y_4=="CM",]

levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "Bacterial.chemotaxis"             "Flagellar.assembly"              
## [3] "Regulation.of.actin.cytoskeleton"
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c("Bacterial chemotaxis", "Flagellar assembly","RAC"))

p1<- ggplot(cor.out.sub, aes(Y_1,X)) +
  facet_grid(.~ Y_5,  scales = "free", space = "free" )+
  geom_tile(aes(fill = r), size=0)+
  scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
  geom_hline(yintercept = c(8.5,9.5), color = "orange")+
  theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
        strip.background = element_rect(fill = "red"),
        panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
        axis.text.x=element_text(colour="black", size=10, face="bold",angle = 90, hjust=1,vjust = 0.5),
        axis.text.y=element_text(colour="black", size=15, face="bold")) +
  labs(caption="RAC: Regulation of actin cytoskeleton")

p1

#ggsave("pdf2/Fig.S7a.Heatmap.KEGG.cellular.motility.2023.01.05.pdf", width = 20, height = 6)

ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)

ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                      "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                      "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(t(ko.sel))

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)


r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)

r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
##  num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))

library(splitstackshape)

cor.out<-cSplit(cor.out, "Y", "_")

cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
                                              "Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))

cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
                                             "Energy metabolism","Membrane transport","CC","GDM","MAC"))


cor.out.sub<-cor.out[cor.out$Y_4=="Two component system",]
levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "Two.component.system"
#cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c("Bacterial chemotaxis", "Flagellar assembly","RAC"))

p2<-ggplot(cor.out.sub, aes(Y_1,X)) +
 facet_grid(.~ Y_5,  scales = "free", space = "free" )+
  geom_tile(aes(fill = r), size=0)+
  scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
  geom_hline(yintercept = c(8.5,9.5), color = "orange")+
  theme(strip.text = element_text(size = 12,face="bold",color = c("white")),
        strip.background = element_rect(fill = "red"),
        panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
        axis.text.x=element_text(colour="black", size=4, face="bold",angle = 90, hjust=1,vjust = 0.5),
        axis.text.y=element_text(colour="black", size=15, face="bold")) 
p2

#ggsave("pdf2/Fig.S7b.corrHeatmap.env.Signal.transduction.2023.01.05.pdf", width = 20, height = 6)

ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)

ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                      "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                      "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(t(ko.sel))

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)


r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)

r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
##  num [1:83460] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)

cor.out<-cSplit(cor.out, "Y", "_")

cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility", "Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Cellular.community.prokaryotes","Pentose.and.glucuronate.interconversions",
                                              "Translation",   "Energy.metabolism",  "Membrane.transport" , "Folding.sorting.and.degradation",
                                              "Metabolism.of.cofactors.and.vitamins"  ))

cor.out$Y_4 <- factor(cor.out$Y_4, labels  = c("BSS","CM", "XenoBDM","Signal transduction","Cellular community","PGI",
                                               "Translation",   "Energy metabolism",  "Membrane transport" , "FSD",
                                               
                                               "Metabolism of cofactors and vitamins"  ))

cor.out.sub<-cor.out[cor.out$Y_4=="Cellular community",]
levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "Biofilm.formation" "Quorum.sensing"
p3<-ggplot(cor.out.sub, aes(Y_1,X)) +
  facet_grid(.~ Y_5,  scales = "free", space = "free" )+
  geom_tile(aes(fill = r), size=0)+
  scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
  geom_hline(yintercept = c(8.5,9.5), color = "orange")+
  theme(strip.text = element_text(size = 12,face="bold", color = c("white")),
        strip.background = element_rect(fill = "red"),
        panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
        axis.text.x=element_text(colour="black", size=4, face="bold",angle = 90, hjust=1),
        axis.text.y=element_text(colour="black", size=15, face="bold"))
p3

#ggsave("pdf2/Fig.S7c.corrHeatmap.env.Cellular.community.2023.01.05.pdf", width = 20, height = 6)
#Supplementary Fig. 14
library(ggpubr)
ggarrange(p1,p2,p3,
          ncol = 1,nrow = 3,widths=c(1,1),heights=c(1,1),common.legend = TRUE,legend = "right",
          labels = c("a","b","c"))

#ggsave("pdf/Supplementary.Fig.13.pdf",plot = last_plot(),units = "in",height = 18,width = 20,dpi = 600)
#Supplementary Fig. 15
ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)

ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                      "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                      "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(t(ko.sel))

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)


r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)

r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
##  num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))

library(splitstackshape)

cor.out<-cSplit(cor.out, "Y", "_")

cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
                                              "Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))

cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
                                             "Energy metabolism","Membrane transport","CC","GDM","MAC"))

cor.out.sub<-cor.out[cor.out$Y_4=="MTP",]
levels(droplevels(factor(cor.out.sub$Y_5)))
##  [1] "Biosynthesis.of.12.14.and.16.membered.macrolides"            
##  [2] "Biosynthesis.of.ansamycins."                                 
##  [3] "Biosynthesis.of.enediyne.antibiotics."                       
##  [4] "Biosynthesis.of.siderophore.group.nonribosomal.peptides."    
##  [5] "Biosynthesis.of.type.II.polyketide.backbone."                
##  [6] "Biosynthesis.of.type.II.polyketide.products."                
##  [7] "Biosynthesis.of.vancomycin.group.antibiotics."               
##  [8] "Carotenoid.biosynthesis."                                    
##  [9] "Diterpenoid.biosynthesis.Including.Gibberellin.biosynthesis."
## [10] "Geraniol.degradation."                                       
## [11] "Insect.hormone.biosynthesis."                                
## [12] "Limonene.and.pinene.degradation."                            
## [13] "Monoterpenoid.biosynthesis."                                 
## [14] "Nonribosomal.peptide.structures."                            
## [15] "Polyketide.sugar.unit.biosynthesis."                         
## [16] "Sesquiterpenoid.and.triterpenoid.biosynthesis."              
## [17] "Terpenoid.backbone.biosynthesis."                            
## [18] "Tetracycline.biosynthesis."                                  
## [19] "Type.I.polyketide.structures."                               
## [20] "Zeatin.biosynthesis."
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, levels = c( "Biosynthesis.of.12.14.and.16.membered.macrolides", "Biosynthesis.of.ansamycins.", "Biosynthesis.of.enediyne.antibiotics.", 
                                                       "Biosynthesis.of.siderophore.group.nonribosomal.peptides.", "Biosynthesis.of.type.II.polyketide.backbone.", "Biosynthesis.of.type.II.polyketide.products.", 
                                                       "Biosynthesis.of.vancomycin.group.antibiotics.", "Carotenoid.biosynthesis.", "Diterpenoid.biosynthesis.Including.Gibberellin.biosynthesis.", 
                                                       "Geraniol.degradation.", "Insect.hormone.biosynthesis.", "Limonene.and.pinene.degradation.",  
                                                       "Monoterpenoid.biosynthesis.", "Nonribosomal.peptide.structures.", 
                                                       "Polyketide.sugar.unit.biosynthesis.","Sesquiterpenoid.and.triterpenoid.biosynthesis.","Terpenoid.backbone.biosynthesis.", "Tetracycline.biosynthesis.", "Type.I.polyketide.structures.", 
                                                        "Zeatin.biosynthesis."))

cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c( "BMM", "Biosynthesis.of.ansamycins", "Biosynthesis.of.enediyne.antibiotics", 
                                                       "Biosynthesis.of.siderophore", "BPB", "Biosyn.polyketide", 
                                                       "Biosyn.vancomycin", "Carotenoid.biosynthesis", "D","Geraniol.D", 
                                                       "IH", "LPD", "M", "Nonribosomal.P","Polyketide.sugar.unit.biosynthesis", 
                                                       "STB", "Terpenoid.backbone.biosynthesis", 
                                                       "TB", "Type.I.polyketide.structures", "ZB"))

p1<-ggplot(cor.out.sub, aes(Y_1,X)) +
  facet_grid(.~ Y_5,  scales = "free", space = "free" )+
  geom_tile(aes(fill = r), size=0)+
  scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
  geom_hline(yintercept = c(8.5,9.5), color = "orange")+
  theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
        strip.background = element_rect(fill = "red"),
        panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.y=element_text(colour="black", size=18, face="bold")) +
  labs(caption="BBM: Biosynthesis.of.12.14.and.16.membered.macrolides;Biosynthesis.of.siderophore: Biosynthesis.of.siderophore.group.nonribosomal.peptides; BPB: Biosynthesis.of.type.II.polyketide.backbone; Biosyn.polyketide: Biosynthesis.of.type.II.polyketide.products; Biosyn.vancomycin: Biosynthesis.of.vancomycin.group.antibiotics; 
  D: Diterpenoid.biosynthesis.Including.Gibberellin.biosynthesis; Geraniol.D: Geraniol.degradation; IH: Insect.hormone.biosynthesis; LPD: Limonene.and.pinene.degradation; M: Monoterpenoid.biosynthesis; Nonribosomal.P: Nonribosomal.peptide.structures; TB: Terpenoid.backbone.biosynthesis; ZB: Zeatin.biosynthesis" )
p1

#ggsave("pdf2/Fig.S8a.corrHeatmap.env.Metabolism.Terpenoid.2023.01.05.pdf", width = 30, height = 7)

ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)

ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                      "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                      "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(t(ko.sel))

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)


r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)

r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
##  num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)

cor.out<-cSplit(cor.out, "Y", "_")

cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
                                              "Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))

cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
                                             "Energy metabolism","Membrane transport","CC","GDM","MAC"))

cor.out.sub<-cor.out[cor.out$Y_4=="XenoBDM",]
levels(droplevels(factor(cor.out.sub$Y_5)))
##  [1] "Aminobenzoate.degradation"                      
##  [2] "Atrazine.degradation"                           
##  [3] "Benzoate.degradation"                           
##  [4] "Bisphenol.degradation"                          
##  [5] "Caprolactam.degradation"                        
##  [6] "Chloroalkane.and.chloroalkene.degradation"      
##  [7] "Chlorocyclohexane.and.chlorobenzene.degradation"
##  [8] "Dioxin.degradation"                             
##  [9] "Drug.metabolism.cytochrome.P450"                
## [10] "Drug.metabolism.other.enzymes"                  
## [11] "Ethylbenzene.degradation"                       
## [12] "Fluorobenzoate.degradation"                     
## [13] "Furfural.degradation"                           
## [14] "Metabolism.of.xenobiotics.by.cytochrome.P450"   
## [15] "Naphthalene.degradation"                        
## [16] "Nitrotoluene.degradation"                       
## [17] "Polycyclic.aromatic.hydrocarbon.degradation"    
## [18] "Steroid.degradation"                            
## [19] "Styrene.degradation"                            
## [20] "Toluene.degradation"                            
## [21] "Xylene.degradation"
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c( "Aminobenzoate.degradation", "Atrazine.degradation", "Benzoate.degradation", 
                                                       "Bisphenol.degradation", "Caprolactam.degradation", "Chloroalkane.and.chloroalkene.degradation", 
                                                       "Chlorocyclohexane.and.chlorobenzene.degradation", "Dioxin.degradation", "Drug.metabolism.cytochrome.P450", 
                                                       "Drug.metabolism.other.enzymes", "Ethylbenzene.degradation", "Fluorobenzoate.degradation", 
                                                       "Furfural.degradation", "Metabolism.of.xenobiotics.by.cytochrome.P450",  
                                                       "Naphthalene.degradation", "Nitrotoluene.degradation", "Polycyclic.aromatic.hydrocarbon.degradation", 
                                                       "Steroid.degradation", "Styrene.degradation", "Toluene.degradation", 
                                                       "Xylene.degradation"))

cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c( "Aminobenzoate.degradation", "AtrD", "Benzoate.degradation", 
                                                       "B", "CapD", "ChloroalkaneD", 
                                                       "ChlcbD", "DioxinD", "Dc", 
                                                       "DrugO", "EbD", "FbD", 
                                                       "FfD", "Mxc", "NaD", 
                                                       "NiD", "PcahD", "SteroidD", 
                                                       "StyreneD", "TolueneD", "XyleneD"))

p2<-ggplot(cor.out.sub, aes(Y_1,X)) +
  facet_grid(.~ Y_5,  scales = "free", space = "free" )+
  geom_tile(aes(fill = r), size=0)+
  scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
  geom_hline(yintercept = c(8.5,9.5), color = "orange")+
  theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
        strip.background = element_rect(fill = "red"),
        panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.y=element_text(colour="black", size=18, face="bold")) +
  labs(caption="AtrD: Atrazine.degradation; B: Bisphenol.degradation; CapD: Caprolactam.degradation; ChlcbD: Chloroalkane.and.chloroalkene.degradation; ChlorocyclohexaneD: Chlorocyclohexane.and.chlorobenzene.degradation; DioxinD: Dioxin.degradation; Dc: Drug.metabolism.cytochrome.P450.; DrugO: Drug.metabolism.other.enzymes; EbD: Ethylbenzene.degradation; FbD: Fluorobenzoate.degradation; FfD: Furfural.degradation;
       Mxc: Metabolism.of.xenobiotics.by.cytochrome; NaD: Naphthalene.degradation; NiD: Nitrotoluene.degradation; PcahD: Polycyclic.aromatic.hydrocarbon.degradation; SD: Steroid.degradation; StyreneD: Styrene.degradation; TolueneD: Toluene.degradation; XyleneD: Xylene.degradation" )

p2

#ggsave("pdf2/Fig.S8b.corrHeatmap.env.XenoBDM.2023.01.05.pdf", width = 30, height = 7)

env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                      "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                      "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(t(rgi.mgm1))

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)


r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)

r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
##  num [1:21120] -0.38 -0.28 -0.28 0.17 0.48 -0.23 -0.1 -0.19 -0.32 -0.35 ...
cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)

cor.out.sig <- cor.out
cor.out.sig$r[cor.out.sig$p > 0.01] <- 0
cor.out.sig$Y_2<-"Antibiotic resistance genes (ARG)"

p3<-ggplot(cor.out.sig, aes(Y,X)) +
  facet_grid(.~ Y_2,  scales = "free", space = "free" )+
  geom_tile(aes(fill = r), size=0)+
  scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
  geom_hline(yintercept = c(8.5,9.5), color = "orange")+
  theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
        strip.background = element_rect(fill = "red"),
        panel.spacing = unit(0.1, "lines"),
        axis.title= element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.y=element_text(colour="black", size=18, face="bold"))
p3

#ggsave("pdf2/Fig.S8c.corrHeatmap.env.ARG.genes.2023.01.09.pdf", width = 30, height = 7)
#Supplementary Fig. 15
library(ggpubr)
ggarrange(p1,p2,p3,
          ncol = 1,nrow = 3,widths=c(1,1),heights=c(1,1),common.legend = TRUE,legend = "right",
          labels = c("a","b","c"))

#ggsave("pdf/Supplementary.Fig.14.pdf",plot = last_plot(),units = "in",height = 21,width = 30,dpi = 600)
#Supplementary Fig. 16
ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)

ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                      "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                      "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(t(ko.sel))

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)


r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)

r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
##  num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
library(splitstackshape)

cor.out<-cSplit(cor.out, "Y", "_")

cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
                                              "Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))

cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
                                             "Energy metabolism","Membrane transport","CC","GDM","MAC"))

cor.out.sub<-cor.out[cor.out$Y_4=="Energy metabolism",]
levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "Carbon.fixation.pathways.in.prokaryotes"
## [2] "Methane.metabolism"                     
## [3] "Nitrogen.metabolism"                    
## [4] "Oxidative.phosphorylation"              
## [5] "Sulfur.metabolism"
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c("Carbon.fixation", "Methane.metabolism",  "Nitrogen.M",  "Oxidative.phosphorylation",   "Sulfur.metabolism" ))

p1<-ggplot(cor.out.sub, aes(Y_1,X)) +
  facet_grid(.~ Y_5,  scales = "free", space = "free" )+
  geom_tile(aes(fill = r), size=0)+
  scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
  geom_hline(yintercept = c(8.5,9.5), color = "orange")+
  theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
        strip.background = element_rect(fill = "blue"),
        panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.y=element_text(colour="black", size=8, face="bold")) +
  labs(caption="Nitrogen.M: Nitrogen metabolism")
p1

#ggsave("pdf2/Fig.S9a.corrHeatmap.env.Energy.metabolism.2023.01.05.pdf", width = 10, height = 3)

ko.ID.sub<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "r.k_4")
ko.ID.sub <- merge(ko.ID.sub, ko.mgm.ID0, by.x = "KO", by.y = "Category", all = F)
ko.mgm1$KO <- row.names(ko.mgm1)
ko.mgm.sub0 <- merge(ko.ID.sub,ko.mgm1, by = "KO", all = T )
ko.mgm.sub0 <- ko.mgm.sub0[!is.na(ko.mgm.sub0$Level1),]
ko.mgm.sub0 <- ko.mgm.sub0[ order(ko.mgm.sub0$No),]
row.names(ko.mgm.sub0) <- paste0(ko.mgm.sub0$KO,"_", ko.mgm.sub0$No, "_", ko.mgm.sub0$Level1, "_", ko.mgm.sub0$Level2 , "_", ko.mgm.sub0$Level3)

ko.sel <-ko.mgm.sub0[, c(-1:-6)]
ko.sel.ID <- ko.mgm.sub0[, c(1:6)]
colnames(ko.sel) == env.amp1$sample_name
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE
env.tmp1<-env.amp1[,c("latitude","longitude","altitude","MAP","MAT",
                      "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                      "AllP_abu","AllP_bsa","AllP_rich")]

env.tmp2 <- data.frame(t(ko.sel))

spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)


r<-data.frame(spman.d12$r)
r[is.na(r)] <-0
r$X<-row.names(r)

r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
p<-data.frame(spman.d12$p)
p$X<-row.names(p)
p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
cor.out<-cbind(r.long,p.long$p)
cor.out$r <- round(as.numeric(cor.out$r), 2)
str(cor.out$r)
##  num [1:61700] 0.56 0.51 0.34 -0.21 -0.5 0.35 0.09 0.06 -0.23 0.11 ...
cor.out$X<- factor(cor.out$X, 
                   levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                              "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))

cor.out$X<- factor(cor.out$X, 
                   labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                              "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                              "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                              "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))

library(splitstackshape)

cor.out<-cSplit(cor.out, "Y", "_")

cor.out$Y_4 <- factor(cor.out$Y_4, levels = c("Bacterial.secretion.system","Cell.motility","Xenobiotics.biodegradation.and.metabolism","Signal.transduction","Metabolism.of.terpenoids.and.polyketides","Glycan.biosynthesis.and.metabolism","Porphyrin.metabolism",
                                              "Energy.metabolism","Membrane.transport","Citrate.cycle","Glyoxylate.and.dicarboxylate.metabolism","Metabolism.of.other.amino.acids"))

cor.out$Y_4 <- factor(cor.out$Y_4,labels = c("BSS","CM","XenoBDM","Two component system","MTP","GBM","PM",
                                             "Energy metabolism","Membrane transport","CC","GDM","MAC"))

cor.out.sub<-cor.out[cor.out$Y_4=="Membrane transport",]
levels(droplevels(factor(cor.out.sub$Y_5)))
## [1] "ABC.transporters"          "Phosphotransferase.system"
cor.out.sub$Y_5 <- factor(cor.out.sub$Y_5, labels = c("ABC.transporters" , "PS" ))

p2<-ggplot(cor.out.sub, aes(Y_1,X)) +
  facet_grid(.~ Y_5,  scales = "free", space = "free" )+
  geom_tile(aes(fill = r), size=0)+
  scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
  geom_hline(yintercept = c(8.5,9.5), color = "orange")+
  theme(strip.text = element_text(size = 10,face="bold", color = c("white")),
        strip.background = element_rect(fill = "blue"),
        panel.spacing = unit(0.1, "lines"), axis.title= element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.y=element_text(colour="black", size=8, face="bold")) +
  labs(caption="PS: Phosphotransferase.system")
p2

#ggsave("pdf2/Fig.S9b.corrHeatmap.env.Membrane.transport.2023.01.05.pdf", width = 10, height = 3)
#Supplementary Fig. 16
library(ggpubr)
ggarrange(p1,p2,
          ncol = 1,nrow = 2,widths=c(1,1),heights=c(1,1),common.legend = TRUE,legend = "right",
          labels = c("a","b"))

#ggsave("pdf/Supplementary.Fig.15.pdf",plot = last_plot(),units = "in",height = 6,width = 10,dpi = 600)
#Supplementary Fig. 17
rm(list = ls())
load("data/CForBio.data.prep.2022.11.22.RData")

library(vegan)
cutoff <- 1000

sort(colSums(bac.amp1))
##  BD1623  DL1904    NG14  XS0601 BTM1515  GT0910     NG7  BD1512  TT1419  CB2424 
##   24903   26972   27053   27234   29143   31098   33452   34090   34339   34770 
##  GT2515  TT0302 DHS0824  HS1433  HS1531  TT1201  HS2325 BTM2317  GT0206 DHS0911 
##   34826   35210   35306   35369   35611   35751   36395   36829   36923   37867 
##    LS11 DHS0808 BTM1323     NG8  CB2009  XS0809  DL0810     LS9  DL0906  XS2022 
##   39954   41573   42171   42958   43395   45875   45977   46174   46676   47364 
##     GH1     GH4  BD1203    LS15  CB0122     GH2 
##   48535   54597   55685   60392   65051   65829
sort(colSums(bac.mgm1))
##   HS1433   BD1623      NG7   XS2022  DHS0824  DHS0808   DL1904  BTM1323 
##  9115574  9597402  9667544  9737388  9875341 10013817 10016208 10110020 
##   TT1419   HS2325   GT0910   DL0810   GT2515   GT0206   TT0302      NG8 
## 10160188 10164701 10195067 10301253 10395324 10534807 10643744 10648853 
##   XS0809   HS1531     LS11   CB2424  BTM2317   XS0601      GH1   DL0906 
## 10675545 10684100 10970718 11115704 11143274 11320447 11461977 11544870 
##   BD1512      GH2     NG14  DHS0911   CB0122   CB2009   BD1203   TT1201 
## 11738899 11750163 11847992 11987853 12237915 12559810 12632900 12911304 
##      LS9     LS15      GH4  BTM1515 
## 13903753 14521553 14922282 15939544
sort(colSums(ko.mgm1))
##   HS1433   HS1531   BD1512   HS2325  DHS0911   BD1203   CB0122   XS2022 
## 516612.0 554923.2 561838.9 564412.5 570647.8 572166.7 578939.1 581887.7 
##   BD1623   TT1419      GH4   XS0809   CB2009   TT0302  DHS0824   GT0206 
## 585609.5 588950.4 590928.8 591823.2 595951.2 597472.6 597523.9 605316.2 
##   GT0910  DHS0808   XS0601   TT1201      LS9     LS15      NG8   GT2515 
## 607017.0 611526.6 612949.0 618809.8 618974.8 621618.7 623901.4 624726.0 
##     NG14      NG7  BTM1323  BTM1515     LS11  BTM2317   CB2424      GH2 
## 626956.5 630290.3 640564.2 640670.3 653233.2 657988.6 661383.3 664885.4 
##      GH1   DL0906   DL0810   DL1904 
## 670864.8 682216.2 709302.7 712869.3
sort(colSums(cog.mgm1))
##   HS1433   BD1512   CB0122  DHS0911   HS1531   BD1623   HS2325      GH4 
## 665137.6 700954.6 701771.9 706113.6 707706.2 717680.3 719426.8 719907.9 
##   BD1203   TT1419   XS2022   CB2009   XS0809      NG8   TT0302      LS9 
## 720810.2 735415.6 736115.9 736728.3 737294.4 738141.8 741127.0 741129.8 
##  DHS0824     NG14   GT0206      NG7     LS15   GT0910  DHS0808   XS0601 
## 742211.5 745558.0 745871.5 746178.6 747159.8 748678.5 749599.8 753834.3 
##  BTM2317  BTM1323  BTM1515   TT1201     LS11   GT2515   CB2424   DL0906 
## 761668.5 762288.9 766129.1 769869.2 772483.2 780307.3 785374.0 785610.6 
##      GH2      GH1   DL1904   DL0810 
## 806012.5 810165.7 822934.2 825360.4
sort(colSums(rgi.mgm1))
##      GH4   CB0122   CB2009      NG8   BD1623      NG7     NG14   DL0906 
## 20995.21 23022.58 24293.34 24840.47 25081.28 25099.30 25294.91 25324.26 
##   BD1512     LS15   HS1433      LS9   TT1419     LS11   BD1203  BTM2317 
## 25426.07 25842.94 25856.23 25965.01 26532.75 26701.09 26808.88 27092.64 
##  DHS0911   TT0302   XS0601   XS0809   CB2424   XS2022   GT0206  BTM1515 
## 27105.60 27138.77 27223.78 27593.08 27629.52 27753.26 27775.72 27778.20 
##   GT0910   TT1201      GH2   DL0810   DL1904   HS2325  DHS0824  DHS0808 
## 28043.34 28054.02 28314.02 28403.45 28474.85 28496.89 28701.60 28718.88 
##      GH1   HS1531  BTM1323   GT2515 
## 28730.37 29012.93 29564.26 30208.16
sort(colSums(cazy.mgm1))
##   BD1623      GH4   BD1512      NG8      NG7     NG14   CB0122   XS0601 
## 13535.55 13791.28 14072.59 14476.19 14543.41 14716.72 14718.56 14858.62 
##   CB2009   BD1203   GT0910   GT0206   DL0906   CB2424      LS9     LS15 
## 15158.24 15372.90 15421.68 15423.15 15490.17 15512.52 15621.44 15661.32 
##     LS11  BTM2317   XS0809   HS1433   HS2325   XS2022   DL1904   DL0810 
## 15742.96 15933.04 16022.71 16071.16 16630.05 16715.92 16996.28 17006.50 
##      GH2   GT2515  DHS0911      GH1   HS1531  BTM1515   TT1419  DHS0824 
## 17012.91 17096.55 17106.44 17144.00 17227.44 17361.87 17600.44 17729.42 
##  BTM1323   TT0302   TT1201  DHS0808 
## 17929.15 18253.39 18461.59 18792.74
bac.amp <- rrarefy(t(bac.amp1),sample = 24903 )
bac.mgm <- data.frame(t(bac.mgm1))

ko.mgm <- data.frame(t(ko.mgm1))
cog.mgm <- data.frame(t(cog.mgm1))
rgi.mgm <- data.frame(t(rgi.mgm1))
cazy.mgm <- data.frame(t(cazy.mgm1))

env.amp1$rgi.mgm.H <- vegan::diversity(rgi.mgm)
env.amp1$cazy.mgm.H <- vegan::diversity(cazy.mgm)
env.amp1$ko.mgm.H <- vegan::diversity(ko.mgm)
env.amp1$cog.mgm.H <- vegan::diversity(cog.mgm)
env.amp1$bac.mgm.H <- vegan::diversity(bac.mgm)
env.amp1$bac.amp.H <- vegan::diversity(bac.amp)

env.amp1$rgi.mgm.S <- vegan::specnumber(rgi.mgm)
env.amp1$cazy.mgm.S <- vegan::specnumber(cazy.mgm)
env.amp1$ko.mgm.S <- vegan::specnumber(ko.mgm)
env.amp1$cog.mgm.S <- vegan::specnumber(cog.mgm)
env.amp1$bac.mgm.S <- vegan::specnumber(bac.mgm)
env.amp1$bac.amp.S <- vegan::specnumber(bac.amp)

env.amp1$cog.mgm.sum <- rowSums(cog.mgm)
env.amp1$rgi.mgm.sum <- rowSums(rgi.mgm)
env.amp1$cazy.mgm.sum <- rowSums(cazy.mgm)
env.amp1$ko.mgm.sum <- rowSums(ko.mgm)
env.amp1$bac.mgm.sum <- rowSums(bac.mgm)

#func <- c("Ribosome","Aminoacyl.tRNA.biosynthesis")
#nam <- "Translation"
#hi <- 20
Heatmap.genes <- function (func, nam, hi){
  ko.ID<-read_excel("data/KEGG.123.2022.12.09.xlsx",sheet = "KEGG3")
  ko.ID.sub <- data.frame(ko.ID[ko.ID$Level3 %in%  func,])
  
  ko.mgm.sub <- ko.mgm1[row.names(ko.mgm1) %in% ko.ID.sub$KO,]
  ko.ID.sub <- ko.ID.sub[ko.ID.sub$KO %in% row.names(ko.mgm1) ,]
  ko.ID.sub <- ko.ID.sub[order(ko.ID.sub$KO),]
  row.names(ko.mgm.sub) == ko.ID.sub$KO
  ko.ID.sub$KO.id <- paste0(ko.ID.sub$Level3, "_",ko.ID.sub$KO)
  row.names(ko.mgm.sub) <- ko.ID.sub$KO.id
  
  ko.mgm.sub <- ko.mgm.sub[order(ko.ID.sub$No),]
  env.raw1<-env.amp1
  rownames(env.raw1)<-env.raw1$sample_name
  env.sel<-env.raw1[row.names(env.raw1) %in% colnames(ko.mgm.sub),]
  function.sel<-data.frame(t(ko.mgm.sub))
  row.names(env.sel) == row.names(function.sel)
  
  
  env.sel[, paste0(func, ".RA")] <- rowSums(function.sel)
  env.sel[, paste0(func, ".H")] <- vegan::diversity(function.sel)
  env.sel[, paste0(func, ".S")] <- vegan::specnumber(function.sel)
  cog.mgm <- data.frame(t(cog.mgm1))
  row.names(cog.mgm) == row.names(env.sel)
  env.sel$cog.mgm.sum <- rowSums(cog.mgm)
  env.cog <- data.frame(env.sel,cog.mgm )
  env.cog$rib.pct <- env.cog$J/env.cog$cog.mgm.sum
  env.cog$transcript.pct <- env.cog$K/env.cog$cog.mgm.sum
  env.cog$defense.pct <- env.cog$V/env.cog$cog.mgm.sum
  
  env.tmp1<-env.cog[,c("latitude","longitude","altitude","MAP","MAT",
                       "TC","TN","TP","pH","ACa","AMg","AFe","AK","C_N","C_P", "N_P", "soil_D",
                       "AllP_abu","AllP_bsa","AllP_rich")]
  
  env.tmp1<-data.frame(apply(env.tmp1,2,as.numeric))
  env.tmp2 <- function.sel
  
  spman.d12 = corr.test(env.tmp1, env.tmp2,use="pairwise",method="spearman",adjust="fdr",alpha=.05,ci=FALSE)
  
  
  r<-data.frame(spman.d12$r)
  r[is.na(r)] <-0
  r$X<-row.names(r)
  
  r.long <- gather(r, Y, r, 1 : ncol(r)-1,  factor_key=TRUE)
  p<-data.frame(spman.d12$p)
  p$X<-row.names(p)
  p.long <- gather(p, Y, p, 1 : ncol(p)-1, factor_key=TRUE)
  cor.out<-cbind(r.long,p.long$p)
  cor.out$r <- round(as.numeric(cor.out$r), 2)
  str(cor.out$r)
  
  cor.out$X<- factor(cor.out$X, 
                     levels = c("latitude","longitude","altitude","MAP","MAT","TC","TN","TP","pH","ACa","AMg","AFe",
                                "AK","C_N","C_P", "N_P", "soil_D","AllP_abu","AllP_bsa","AllP_rich"))
  
  cor.out$X<- factor(cor.out$X, 
                     labels = c("Latitude","Longitude","Altitude","Mean annual precipitation","Mean annual temperature",
                                "Total Carbon","Soil total Nitrogen","Soil total Phosphorus","Soil pH","Soil available Calcium",
                                "Soil available Magnesium", "Soil available Ferrum", "Soil available Potassium", "Carbon : Nitrogen ratio", "Carbon : Phosphorus ratio", "Nitrogen: Phosphorus ratio", 
                                "Soil bulk density", "Plant abundance", "Plant basal area", "Plant richness"))
  library(splitstackshape)
  #cor.out$Y 
  cor.out<-cSplit(cor.out, "Y", "_")
  cor.out$Y_1 <- factor(cor.out$Y_1, levels = func)
  
  p0<-ggplot(cor.out, aes(Y_2,X)) +
    facet_grid(cols = vars(Y_1), scales = "free", space = "free")+
    geom_tile(aes(fill = r), size=1)+
    geom_hline(yintercept = c(8.5,9.5), color = "red")+
    scale_fill_gradient(guide = "legend", high='green', low='blue',name="rho")+
    theme(axis.title= element_blank(),
          axis.text.x=element_text(colour="black", size=8, face="bold",angle = 90),
          axis.text.y=element_text(colour="black", size=12, face="bold"))
  #print(p0)
  
  cor.out.sig <-cor.out
  cor.out.sig$r[cor.out.sig$p > 0.05] <- 0
  
  p1<-ggplot(cor.out.sig, aes(Y_2,X)) + 
    facet_grid(cols = vars(Y_1), scales = "free", space = "free")+
    geom_tile(aes(fill = r), size=1)+
    geom_text(data=  cor.out.sig[cor.out.sig$r!=0,], aes(label = r), size = 1, font="bold")+scale_fill_gradient2(guide = "legend", high='#2c7bb6',mid="white", low='#d7191c',name="rho")+
   geom_hline(yintercept = c(8.5,9.5), color = "orange")+
    theme_bw()+
    theme(axis.title= element_blank(),
          strip.text = element_text(size = 12,face="bold", color = "white"),
          strip.background = element_rect(fill = "blue"),
          panel.spacing = unit(0, "lines"),
          axis.text.x=element_text(colour="black", size=8, face="bold",angle = 90,vjust = 0.5),
          axis.text.y=element_text(colour="black", size=12,face="bold"))
  
  #ggsave(paste0("pdf/Supplementary.Fig.11.Heatmap2.",nam,".2023.01.05.pdf"), width = hi, height = 6)
  print(p1)

}

Heatmap.genes(  c("Ribosome","Aminoacyl.tRNA.biosynthesis"), "Translation", 20  )
##  num [1:3280] 0.43 0.28 0.12 -0.47 -0.41 0.33 0.28 0.52 0.52 0.71 ...

R Markdown